From 930fcbe956efd2362400a04ae463fb59b3dfc88e Mon Sep 17 00:00:00 2001 From: cturek Date: Sat, 10 Dec 2022 21:56:35 +0000 Subject: [PATCH 1/5] Fixed D sizing issues across fdivsqrt. Fixed preproc to accept either int or float inputs --- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 12 ++-- pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv | 72 +++++++++---------- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 14 ++-- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 56 +++++++-------- pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv | 4 +- pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv | 6 +- pipelined/testbench/tests.vh | 1 + 7 files changed, 78 insertions(+), 87 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 63ffab713..bdbfbde00 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -56,11 +56,11 @@ module fdivsqrt( // output logic [`XLEN-1:0] RemM, ); - logic [`DIVb+3:0] WS, WC; + logic [`DIVb+3:0] WS, WC; logic [`DIVb+3:0] X; - logic [`DIVN-2:0] D; // U0.N-1 - logic [`DIVN-2:0] Dpreproc; - logic [`DIVb:0] FirstU, FirstUM; + logic [`DIVb-1:0] D; + logic [`DIVb-1:0] DPreproc; + logic [`DIVb:0] FirstU, FirstUM; logic [`DIVb+1:0] FirstC; logic Firstun; logic WZero; @@ -71,7 +71,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc, .n, .m, .OTFCSwap, .ALTBM, .BZero, .As, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( @@ -81,7 +81,7 @@ module fdivsqrt( .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, - .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), + .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 72cde3943..2948713b0 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -40,42 +40,34 @@ module fdivsqrtiter( // input logic SqrtM, input logic OTFCSwap, input logic [`DIVb+3:0] X, - input logic [`DIVN-2:0] Dpreproc, - output logic [`DIVN-2:0] D, // U0.N-1 - output logic [`DIVb:0] FirstU, FirstUM, + input logic [`DIVb-1:0] DPreproc, + output logic [`DIVb-1:0] D, + output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, - output logic [`DIVb+3:0] FirstWS, FirstWC + output logic [`DIVb+3:0] FirstWS, FirstWC ); -//QLEN = 1.(number of bits created for division) -// N is NF+1 or XLEN -// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift -// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0 -// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0 -// U/UM should be 1.b so b+1 bits or b:0 -// C needs to be the lenght of the final fraction 0.b so b or b-1:0 - /* verilator lint_off UNOPTFLAT */ - logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b - logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b - logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b - logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b - logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b - logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b - logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b - logic [`DIVb+1:0] initC; // Q2.b + /* verilator lint_off UNOPTFLAT */ + logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b + logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b + logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b + logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b + logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b + logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b + logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b + logic [`DIVb+1:0] initC; // Q2.b logic [`DIVCOPIES-1:0] un; - /* verilator lint_on UNOPTFLAT */ - logic [`DIVb+3:0] WSN, WCN; // Q4.N-1 - logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1 - logic [`DIVb+1:0] NextC; - logic [`DIVb+1:0] CMux; - logic [`DIVb:0] UMux, UMMux; - logic [`DIVb:0] initU, initUM; - + logic [`DIVb+3:0] WSN, WCN; // Q4.b + logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b + logic [`DIVb+1:0] NextC; + logic [`DIVb+1:0] CMux; + logic [`DIVb:0] UMux, UMMux; + logic [`DIVb:0] initU, initUM; + /* verilator lint_on UNOPTFLAT */ // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -85,15 +77,15 @@ module fdivsqrtiter( // Residual WS/SC registers/initializaiton mux mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN); mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN); - flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]); - flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]); + flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]); + flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]); // UOTFC Result U and UM registers/initialization mux // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0; assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; - mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); - mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); + mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); + mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]); flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]); @@ -103,18 +95,18 @@ module fdivsqrtiter( assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10; assign initC = {initCUpper, {`DIVb{1'b0}}}; mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux); - flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]); + flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]); // Divisior register - flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D); + flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); // Divisor Selections // - choose the negitive version of what's being selected - // - D is only the fraction - assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}}; + // - D is a 0.b mantissa + assign DBar = {3'b111, 1'b0, ~D}; if(`RADIX == 4) begin : d2 - assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}}; - assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}}; + assign DBar2 = {2'b11, 1'b0, ~D, 1'b1}; + assign D2 = {2'b0, 1'b1, D, 1'b0}; end // k=DIVCOPIES of the recurrence logic diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index f009cfd8b..7ac8229b9 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -32,8 +32,8 @@ module fdivsqrtpostproc( input logic [`DIVb+3:0] WS, WC, - input logic [`DIVN-2:0] D, // U0.N-1 - input logic [`DIVb:0] FirstU, FirstUM, + input logic [`DIVb-1:0] D, + input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, input logic Firstun, input logic SqrtM, @@ -41,12 +41,12 @@ module fdivsqrtpostproc( input logic [`XLEN-1:0] ForwardedSrcAE, input logic RemOpM, ALTBM, BZero, As, input logic [`DIVBLEN:0] n, m, - output logic [`DIVb:0] QmM, + output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM ); - logic [`DIVb+3:0] W, Sum, RemD; + logic [`DIVb+3:0] W, Sum, RemDM; logic [`DIVb:0] PreQmM; logic NegStickyM, PostIncM; logic weq0; @@ -78,14 +78,14 @@ module fdivsqrtpostproc( assign Sum = WC + WS; assign W = $signed(Sum) >>> `LOGR; assign NegStickyM = W[`DIVb+3]; - assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}}; + assign RemDM = {4'b0000, D}; // Integer division: sign handling for div and rem always_comb if (~As) if (NegStickyM) begin NormQuotM = FirstUM; - NormRemM = W + RemD; + NormRemM = W + RemDM; PostIncM = 0; end else begin NormQuotM = FirstU; @@ -99,7 +99,7 @@ module fdivsqrtpostproc( PostIncM = 0; end else begin NormQuotM = FirstU; - NormRemM = W - RemD; + NormRemM = W - RemDM; PostIncM = 1; end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b06780996..e74934e5e 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -45,22 +45,21 @@ module fdivsqrtpreproc ( output logic OTFCSwap, ALTBM, BZero, As, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, - output logic [`DIVN-2:0] Dpreproc + output logic [`DIVb-1:0] DPreproc ); - // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`NF-1:0] PreprocA, PreprocX; - logic [`NF-1:0] PreprocB, PreprocY; - logic [`NF+1:0] SqrtX; + + logic [`DIVb-1:0] XPreproc; + logic [`DIVb:0] SqrtX; logic [`DIVb+3:0] DivX; - logic [`NE+1:0] Qe; + logic [`NE+1:0] QeE; // Intdiv signals - logic [`DIVb-1:0] ZeroBufX, ZeroBufY; + logic [`DIVb-1:0] IFNormLenX, IFNormLenD; logic [`XLEN-1:0] PosA, PosB; - logic Bs, OTFCSwapTemp, ALTBE; + logic Bs, CalcOTFCSwap, ALTBE; logic [`XLEN-1:0] A64, B64; logic [`DIVBLEN:0] Calcn, Calcm; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; - logic [`DIVBLEN:0] pPlusr, pPrCeil, p, L; + logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; logic [`LOGRK-1:0] pPrTrunc; logic [`DIVb+3:0] PreShiftX; @@ -72,39 +71,38 @@ module fdivsqrtpreproc ( assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; - assign OTFCSwapTemp = (As ^ Bs) & MDUE; + assign CalcOTFCSwap = (As ^ Bs) & MDUE; assign PosA = As ? -A64 : A64; assign PosB = Bs ? -B64 : B64; assign BZero = |ForwardedSrcBE; - assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}}; - assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}}; - lzc #(`DIVb) lzcX (ZeroBufX, L); - lzc #(`DIVb) lzcY (ZeroBufY, Calcm); + assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}}; + assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}}; + lzc #(`DIVb) lzcX (IFNormLenX, ell); + lzc #(`DIVb) lzcY (IFNormLenD, Calcm); - assign PreprocX = Xm[`NF-1:0]<> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)}; + assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; assign Calcn = (pPrCeil << `LOGK) - 1; assign IntBits = (`DIVBLEN)'(`RK) + p; assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; - assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; - assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; + assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF + assign DivX = {3'b000, ~XZero, XPreproc}; // *** explain why X is shifted between radices (initial assignment of WS=RX) - if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; - else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; + if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX; + else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX; assign X = MDUE ? DivX >> RightShiftX : PreShiftX; - assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; // radix 2 radix 4 // 1 copies DIVLEN+2 DIVLEN+2/2 @@ -116,12 +114,12 @@ module fdivsqrtpreproc ( // r = 1 or 2 // DIVRESLEN/(r*`DIVCOPIES) - flopen #(`NE+2) expreg(clk, IFDivStartE, Qe, QeM); - flopen #(1) swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap); + flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n); flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m); - expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe); + expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE)); endmodule @@ -130,7 +128,7 @@ module expcalc( input logic [`NE-1:0] Xe, Ye, input logic Sqrt, input logic XZero, - input logic [`DIVBLEN:0] L, m, + input logic [`DIVBLEN:0] ell, m, output logic [`NE+1:0] Qe ); logic [`NE-2:0] Bias; @@ -162,10 +160,10 @@ module expcalc( 2'h2: Bias = (`NE-1)'(`H_BIAS); endcase end - assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS); + assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; // correct exponent for denormalized input's normalization shifts - assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; + assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; assign Qe = Sqrt ? SExp : DExp; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index b4c2527d3..088aff3a7 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -32,7 +32,7 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 ( - input logic [`DIVN-2:0] D, + input logic [`DIVb-1:0] D, input logic [`DIVb+3:0] DBar, input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, @@ -69,7 +69,7 @@ module fdivsqrtstage2 ( always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; // qz - else Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un + else Dsel = {3'b000, 1'b1, D}; // un // Partial Product Generation // WSA, WCA = WS + WC - qD diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index fb203fd72..f006b0478 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module fdivsqrtstage4 ( - input logic [`DIVN-2:0] D, + input logic [`DIVb-1:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, @@ -61,7 +61,7 @@ module fdivsqrtstage4 ( // 0010 = -1 // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; - assign Dmsbs = D[`DIVN-2:`DIVN-4]; + assign Dmsbs = D[`DIVb-1:`DIVb-3]; assign WCmsbs = WC[`DIVb+3:`DIVb-4]; assign WSmsbs = WS[`DIVb+3:`DIVb-4]; @@ -77,7 +77,7 @@ module fdivsqrtstage4 ( 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; + 4'b0010: Dsel = {3'b0, 1'b1, D}; 4'b0001: Dsel = D2; default: Dsel = 'x; endcase diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 2d32ea394..9bd99aeef 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1413,6 +1413,7 @@ string imperas32f[] = '{ string arch32f[] = '{ `RISCVARCHTEST, + "rv32i_m/F/src/fdiv_b20-01.S", "rv32i_m/F/src/fadd_b10-01.S", "rv32i_m/F/src/fadd_b1-01.S", "rv32i_m/F/src/fadd_b11-01.S", From b69aa39f30548b1db9ae5941447103c559e1ef48 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 14 Dec 2022 09:34:29 -0600 Subject: [PATCH 2/5] Reduced complexity of linebytemask. --- pipelined/src/cache/cache.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 6c7aa8994..6145749e8 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -163,7 +163,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE end assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. - assign LineByteMask = ~SetValid & ~SetDirty ? '0 : ~SetValid & SetDirty ? DemuxedByteMask : '1; // if store hit only enable the word and subword bytes, else write all bytes. + logic [LINELEN/8-1:0] LineByteMask2; + assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), From 4a0e4aed99a7b5c0a2c47b6e39867f291b999bb1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 14 Dec 2022 09:49:15 -0600 Subject: [PATCH 3/5] Signal renames to reflect figures. --- pipelined/src/cache/cache.sv | 16 +++++++-------- pipelined/src/cache/cacheLRU.sv | 4 ++-- pipelined/src/cache/cachefsm.sv | 8 ++++---- pipelined/src/cache/cacheway.sv | 35 ++++++++++++++++----------------- 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 6145749e8..4e34eb02b 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -94,14 +94,14 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE logic [NUMWAYS-1:0] NextFlushWay; logic FlushWayCntEn; logic FlushWayCntRst; - logic SelEvict; + logic SelWriteback; logic LRUWriteEn; logic SelFlush; logic ResetOrFlushAdr, ResetOrFlushWay; logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic SelFetchBuffer; - logic ce; + logic CacheEn; localparam LOGLLENBYTES = $clog2(WORDLEN/8); localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; @@ -124,12 +124,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) - CacheWays[NUMWAYS-1:0](.clk, .reset, .ce, .CAdr, .PAdr, .LineWriteData, .LineByteMask, - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .VictimWay, + CacheWays[NUMWAYS-1:0](.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask, + .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); if(NUMWAYS > 1) begin:vict cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU( - .clk, .reset, .ce, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), + .clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage), .SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache); end else assign VictimWay = 1'b1; // one hot. assign CacheHit = | HitWay; @@ -174,7 +174,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), - .s({SelFlush, SelEvict}), .y(CacheBusAdr)); + .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); ///////////////////////////////////////////////////////////////////////////////////////////// // Flush address and way generation during flush @@ -199,10 +199,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .ClearValid, .ClearDirty, .SetDirty, - .SetValid, .SelEvict, .SelFlush, + .SetValid, .SelWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, - .ce, + .CacheEn, .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cacheLRU.sv b/pipelined/src/cache/cacheLRU.sv index f6d5adf57..fe22ec2c8 100644 --- a/pipelined/src/cache/cacheLRU.sv +++ b/pipelined/src/cache/cacheLRU.sv @@ -32,7 +32,7 @@ module cacheLRU #(parameter NUMWAYS = 4, SETLEN = 9, OFFSETLEN = 5, NUMLINES = 128)( - input logic clk, reset, ce, FlushStage, + input logic clk, reset, CacheEn, FlushStage, input logic [NUMWAYS-1:0] HitWay, input logic [NUMWAYS-1:0] ValidWay, output logic [NUMWAYS-1:0] VictimWay, @@ -120,7 +120,7 @@ module cacheLRU // LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice. always_ff @(posedge clk) begin if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; - if(ce) begin + if(CacheEn) begin if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; else if (LRUWriteEn & ~FlushStage) begin LRUMemory[CAdr] <= NextLRU; ///***** RT: This is not right. Logically should be PAdr, but it breaks linux. diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 860432c82..1396adf43 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -64,7 +64,7 @@ module cachefsm output logic ClearDirty, output logic SetDirty, output logic SetValid, - output logic SelEvict, + output logic SelWriteback, output logic LRUWriteEn, output logic SelFlush, output logic FlushAdrCntEn, @@ -72,7 +72,7 @@ module cachefsm output logic FlushAdrCntRst, output logic FlushWayCntRst, output logic SelFetchBuffer, - output logic ce); + output logic CacheEn); logic resetDelay; logic AMO, StoreAMO; @@ -170,7 +170,7 @@ module cachefsm assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls - assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | + assign SelWriteback = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK); @@ -201,6 +201,6 @@ module cachefsm resetDelay; assign SelFetchBuffer = CurrState == STATE_MISS_WRITE_CACHE_LINE | CurrState == STATE_MISS_READ_DELAY; - assign ce = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset; + assign CacheEn = (CurrState == STATE_READY & ~Stall | CacheStall) | (CurrState != STATE_READY) | reset; endmodule // cachefsm diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 1fcc2566f..5f9869547 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -33,7 +33,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) ( input logic clk, - input logic ce, + input logic CacheEn, input logic reset, input logic [$clog2(NUMLINES)-1:0] CAdr, input logic [`PA_BITS-1:0] PAdr, @@ -42,7 +42,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic ClearValid, input logic SetDirty, input logic ClearDirty, - input logic SelEvict, + input logic SelWriteback, input logic SelFlush, input logic VictimWay, input logic FlushWay, @@ -76,8 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic ClearValidWay; logic SetDirtyWay; logic ClearDirtyWay; - logic SelectedWay; - logic SelWriteback; + logic SelNonHit; logic SelData; logic FlushWayEn, VictimWayEn; @@ -85,28 +84,28 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // FlushWay and VictimWay are part of a one hot way selection. Must clear them if FlushWay not selected // or VictimWay not selected. assign FlushWayEn = FlushWay & SelFlush; - assign VictimWayEn = VictimWay & SelEvict; + assign VictimWayEn = VictimWay & SelWriteback; - assign SelWriteback = FlushWayEn | SetValid | SelEvict; + assign SelNonHit = FlushWayEn | SetValid | SelWriteback; mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); //assign SelTag = VictimWay | FlushWay; - assign SelData = HitWay | FlushWayEn | VictimWayEn; + //assign SelData = HitWay | FlushWayEn | VictimWayEn; - mux2 #(1) selectedwaymux(HitWay, SelTag, SelWriteback , SelectedWay); + mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// // RT: Can we merge these two muxes? This is also shared in cacheLRU. - //mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelectedWay); - //mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelEvict}, SelData); + //mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelData); + //mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelNonHit}, SelData); - assign SetValidWay = SetValid & SelectedWay; - assign ClearValidWay = ClearValid & SelectedWay; - assign SetDirtyWay = SetDirty & SelectedWay; - assign ClearDirtyWay = ClearDirty & SelectedWay; + assign SetValidWay = SetValid & SelData; + assign ClearValidWay = ClearValid & SelData; + assign SetDirtyWay = SetDirty & SelData; + assign ClearDirtyWay = ClearDirty & SelData; // If writing the whole line set all write enables to 1, else only set the correct word. assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage; @@ -117,7 +116,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // Tag Array ///////////////////////////////////////////////////////////////////////////////////////////// - sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce, + sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn), .addr(CAdr), .dout(ReadTag), .bwe('1), .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); @@ -140,7 +139,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, localparam integer LOGNUMSRAM = $clog2(NUMSRAM); for(words = 0; words < NUMSRAM; words++) begin: word - sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce, .addr(CAdr), + sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr), .dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]), .din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]), .we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words])); @@ -155,7 +154,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset) ValidBits <= #1 '0; - if(ce) begin + if(CacheEn) begin ValidWay <= #1 ValidBits[CAdr]; if(InvalidateCache & ~FlushStage) ValidBits <= #1 '0; else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay; @@ -171,7 +170,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // reset is optional. Consider merging with TAG array in the future. //if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; - if(ce) begin + if(CacheEn) begin Dirty <= #1 DirtyBits[CAdr]; if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay; end From 643a2e7cf9980aa51ff1f3b1053cedb3db80c6fc Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 14 Dec 2022 17:03:13 -0800 Subject: [PATCH 4/5] Use FPU divider for integer division when F is supported --- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 5 +++-- pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 10 ++++++---- pipelined/src/fpu/fpu.sv | 9 ++++++--- pipelined/src/ieu/controller.sv | 16 +++++++++------- pipelined/src/ieu/datapath.sv | 11 +++++++---- pipelined/src/ieu/ieu.sv | 8 +++++--- pipelined/src/muldiv/muldiv.sv | 15 +++++++++++---- pipelined/src/wally/wallypipelinedcore.sv | 6 ++++-- tests/riscof/Makefile | 11 ++--------- 9 files changed, 53 insertions(+), 38 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index bdbfbde00..2c1aa7ed3 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -52,7 +52,8 @@ module fdivsqrt( output logic FDivBusyE, IFDivStartE, FDivDoneE, // output logic DivDone, output logic [`NE+1:0] QeM, - output logic [`DIVb:0] QmM + output logic [`DIVb:0] QmM, + output logic [`XLEN-1:0] FPIntDivResultM // output logic [`XLEN-1:0] RemM, ); @@ -88,5 +89,5 @@ module fdivsqrt( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAE, .n, .ALTBM, .m, .BZero, .As, - .QmM, .WZero, .DivSM); + .QmM, .WZero, .DivSM, .FPIntDivResultM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 7ac8229b9..925c12892 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -43,7 +43,8 @@ module fdivsqrtpostproc( input logic [`DIVBLEN:0] n, m, output logic [`DIVb:0] QmM, output logic WZero, - output logic DivSM + output logic DivSM, + output logic [`XLEN-1:0] FPIntDivResultM ); logic [`DIVb+3:0] W, Sum, RemDM; @@ -53,7 +54,7 @@ module fdivsqrtpostproc( logic [`DIVBLEN:0] NormShiftM; logic [`DIVb:0] IntQuotM, NormQuotM; logic [`DIVb+3:0] IntRemM, NormRemM; - logic [`DIVb+3:0] PreResultM, ResultM; + logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM; // check for early termination on an exact result. If the result is not exact, the sticky should be set aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0); @@ -136,8 +137,9 @@ module fdivsqrtpostproc( // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted - assign ResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)}; - + assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)}; + assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0]; + assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit assign QmM = SqrtM ? (PreQmM << 1) : PreQmM; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index bd4053dcb..10fa4d70e 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -55,7 +55,8 @@ module fpu ( output logic FCvtIntW, // select FCvtIntRes (to IEU) output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit) - output logic [4:0] SetFflagsM // FPU flags (to privileged unit) + output logic [4:0] SetFflagsM, // FPU flags (to privileged unit) + output logic [`XLEN-1:0] FPIntDivResultW ); // FPU specifics: @@ -152,6 +153,7 @@ module fpu ( logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed logic StallUnpackedM; + logic [`XLEN-1:0] FPIntDivResultM; // DECODE STAGE @@ -267,7 +269,7 @@ module fpu ( .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E, .StallE, .StallM, .TrapM, .DivSM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, - .QmM /*, .DivDone(DivDoneM) */); + .QmM, .FPIntDivResultM /*, .DivDone(DivDoneM) */); // // compare @@ -387,7 +389,8 @@ module fpu ( // M/W pipe registers flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); - flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); + flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); + flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FPIntDivResultM, FPIntDivResultW); // BEGIN WRITEBACK STAGE diff --git a/pipelined/src/ieu/controller.sv b/pipelined/src/ieu/controller.sv index ab623b0e5..bf9482c80 100644 --- a/pipelined/src/ieu/controller.sv +++ b/pipelined/src/ieu/controller.sv @@ -65,7 +65,7 @@ module controller( output logic FWriteIntM, // Writeback stage control signals input logic StallW, FlushW, - output logic RegWriteW, // for datapath and Hazard Unit + output logic RegWriteW, DivW, // for datapath and Hazard Unit output logic [2:0] ResultSrcW, // Stall during CSRs output logic CSRWriteFencePendingDEM, @@ -109,6 +109,7 @@ module controller( logic IllegalERegAdrD; logic [1:0] AtomicE; logic FencePendingD, FencePendingE, FencePendingM; + logic DivE, DivM; // Extract fields @@ -222,16 +223,17 @@ module controller( assign MemReadE = MemRWE[1]; assign SCE = (ResultSrcE == 3'b100); assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers + assign DivE = MDUE & Funct3E[2]; // Division operation // Memory stage pipeline control register - flopenrc #(19) controlregM(clk, reset, FlushM, ~StallM, - {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE}, - {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM}); + flopenrc #(20) controlregM(clk, reset, FlushM, ~StallM, + {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE, DivE}, + {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, FencePendingM, InstrValidM, DivM}); // Writeback stage pipeline control register - flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW, - {RegWriteM, ResultSrcM}, - {RegWriteW, ResultSrcW}); + flopenrc #(5) controlregW(clk, reset, FlushW, ~StallW, + {RegWriteM, ResultSrcM, DivM}, + {RegWriteW, ResultSrcW, DivW}); // Stall pipeline at Fetch if a CSR Write or Fence is pending in the subsequent stages assign CSRWriteFencePendingDEM = CSRWriteD | CSRWriteE | CSRWriteM | FencePendingD | FencePendingE | FencePendingM; diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index 89ebd9b5d..abc232651 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -57,14 +57,15 @@ module datapath ( output logic [`XLEN-1:0] WriteDataM, // Writeback stage signals input logic StallW, FlushW, -(* mark_debug = "true" *) input logic RegWriteW, +(* mark_debug = "true" *) input logic RegWriteW, DivW, input logic SquashSCW, input logic [2:0] ResultSrcW, input logic [`XLEN-1:0] FCvtIntResW, input logic [`XLEN-1:0] ReadDataW, // input logic [`XLEN-1:0] PCLinkW, input logic [`XLEN-1:0] CSRReadValW, MDUResultW, - // Hazard Unit signals + input logic [`XLEN-1:0] FPIntDivResultW, + // Hazard Unit signals output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, output logic [4:0] RdE, RdM, RdW ); @@ -85,7 +86,7 @@ module datapath ( // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ResultW; - logic [`XLEN-1:0] IFResultW, IFCvtResultW; + logic [`XLEN-1:0] IFResultW, IFCvtResultW, MulDivResultW; // Decode stage assign Rs1D = InstrD[19:15]; @@ -125,10 +126,12 @@ module datapath ( if (`F_SUPPORTED) begin:fpmux mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); + mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW); end else begin:fpmux assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW; + assign MulDivResultW = MDUResultW; end - mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); // handle Store Conditional result if atomic extension supported if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; diff --git a/pipelined/src/ieu/ieu.sv b/pipelined/src/ieu/ieu.sv index 014d24f67..40d91a409 100644 --- a/pipelined/src/ieu/ieu.sv +++ b/pipelined/src/ieu/ieu.sv @@ -58,6 +58,7 @@ module ieu ( output logic InvalidateICacheM, FlushDCacheM, // Writeback stage + input logic [`XLEN-1:0] FPIntDivResultW, input logic [`XLEN-1:0] CSRReadValW, MDUResultW, input logic [`XLEN-1:0] FCvtIntResW, output logic [4:0] RdW, @@ -83,6 +84,7 @@ module ieu ( logic SCE; logic [4:0] RdE; logic FWriteIntM; + logic DivW; // forwarding signals logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; @@ -99,15 +101,15 @@ module ieu ( .Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM, .CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M, .RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM, - .StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD); + .StallW, .FlushW, .RegWriteW, .DivW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD); datapath dp( .clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE, .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, .FCvtIntW, - .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, - .CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); + .StallW, .FlushW, .RegWriteW, .DivW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW, + .CSRReadValW, .MDUResultW, .FPIntDivResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW); forward fw( .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW, diff --git a/pipelined/src/muldiv/muldiv.sv b/pipelined/src/muldiv/muldiv.sv index 5fa717e5f..c41ec4592 100644 --- a/pipelined/src/muldiv/muldiv.sv +++ b/pipelined/src/muldiv/muldiv.sv @@ -59,10 +59,17 @@ module muldiv ( // Divide // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign DivE = MDUE & Funct3E[2]; - assign DivSignedE = ~Funct3E[0]; - intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE, - .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); + // When F extensions are supported, use the FPU divider instead + if (`F_SUPPORTED) begin + assign QuotM = 0; + assign RemM = 0; + assign DivBusyE = 0; + end else begin + assign DivE = MDUE & Funct3E[2]; + assign DivSignedE = ~Funct3E[0]; + intdivrestoring div(.clk, .reset, .StallM, .TrapM, .DivSignedE, .W64E, .DivE, + .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); + end // Result multiplexer always_comb diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 076088857..a34a088e6 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -99,6 +99,7 @@ module wallypipelinedcore ( logic FpLoadStoreM; logic [1:0] FResSelW; logic [4:0] SetFflagsM; + logic [`XLEN-1:0] FPIntDivResultW; // memory management unit signals logic ITLBWriteF; @@ -228,7 +229,7 @@ module wallypipelinedcore ( .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, // Writeback stage - .CSRReadValW, .MDUResultW, + .CSRReadValW, .MDUResultW, .FPIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]), .InstrValidM, .FCvtIntResW, @@ -405,7 +406,8 @@ module wallypipelinedcore ( .FCvtIntW, // fpu result selection .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) .IllegalFPUInstrM, // Is the instruction an illegal fpu instruction - .SetFflagsM // FPU flags (to privileged unit) + .SetFflagsM, // FPU flags (to privileged unit) + .FPIntDivResultW ); // floating point unit end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low assign FStallD = 0; diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index aba3983c3..8292b9888 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,8 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) #XLEN ?= 64 -all: root fsd_fld_tempfix arch32 wally32 wally32e arch64 wally64 -#all: root fsd_fld_tempfix wally32 +all: root arch32 wally32 wally32e arch64 wally64 root: mkdir -p $(work_dir) @@ -20,14 +19,8 @@ root: sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini sed 's,{0},$(current_dir),g;s,{1},32e,g' config.ini > config32e.ini -fsd_fld_tempfix: - # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests - # https://github.com/riscv-non-isa/riscv-arch-test/issues/266 - find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} - find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} - arch32: - riscof --verbose debug run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser + riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv32i_m/ || echo "error suppressed" arch64: From e80e84aacedb7c05d2c59858e32b3198b02b0298 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 15 Dec 2022 06:37:55 -0800 Subject: [PATCH 5/5] Added IDIV_ON_FPU flag to control whether integer division uses FPU --- pipelined/config/buildroot/wally-config.vh | 1 + pipelined/config/fpga/wally-config.vh | 1 + pipelined/config/rv32e/wally-config.vh | 1 + pipelined/config/rv32gc/wally-config.vh | 1 + pipelined/config/rv32i/wally-config.vh | 1 + pipelined/config/rv32ic/wally-config.vh | 1 + pipelined/config/rv64BP/wally-config.vh | 1 + pipelined/config/rv64fpquad/wally-config.vh | 1 + pipelined/config/rv64gc/wally-config.vh | 1 + pipelined/config/rv64i/wally-config.vh | 1 + pipelined/src/ieu/datapath.sv | 6 +++++- pipelined/src/muldiv/muldiv.sv | 2 +- 12 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index defbf458d..6b5490354 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -72,6 +72,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 16 diff --git a/pipelined/config/fpga/wally-config.vh b/pipelined/config/fpga/wally-config.vh index 8d9ff7e15..97b43c5b4 100644 --- a/pipelined/config/fpga/wally-config.vh +++ b/pipelined/config/fpga/wally-config.vh @@ -74,6 +74,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 8b6dea7e7..99f61c12a 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -73,6 +73,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 1 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index 69b256ad1..bafe35e6d 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -72,6 +72,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index 4eaa116a0..46e493cbf 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -73,6 +73,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index ba47915de..d6ca045ea 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -72,6 +72,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 8591ab1c9..85456e2c9 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -76,6 +76,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Address space `define RESET_VECTOR 64'h0000000000001000 diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh index 13df811c4..eeba56990 100644 --- a/pipelined/config/rv64fpquad/wally-config.vh +++ b/pipelined/config/rv64fpquad/wally-config.vh @@ -74,6 +74,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 630dfdb6f..a469a2552 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -74,6 +74,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 64 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 098755cd2..5c34b96a1 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -74,6 +74,7 @@ // Integer Divider Configuration // DIV_BITSPERCYCLE must be 1, 2, or 4 `define DIV_BITSPERCYCLE 4 +`define IDIV_ON_FPU 0 // Legal number of PMP entries are 0, 16, or 64 `define PMP_ENTRIES 0 diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index abc232651..7c05986d5 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -126,7 +126,11 @@ module datapath ( if (`F_SUPPORTED) begin:fpmux mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); - mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW); + if (`IDIV_ON_FPU) begin + mux2 #(`XLEN) divresultmuxW(MDUResultW, FPIntDivResultW, DivW, MulDivResultW); + end else begin + assign MulDivResultW = MDUResultW; + end end else begin:fpmux assign IFResultM = IEUResultM; assign IFCvtResultW = IFResultW; assign MulDivResultW = MDUResultW; diff --git a/pipelined/src/muldiv/muldiv.sv b/pipelined/src/muldiv/muldiv.sv index c41ec4592..d6bc26de4 100644 --- a/pipelined/src/muldiv/muldiv.sv +++ b/pipelined/src/muldiv/muldiv.sv @@ -60,7 +60,7 @@ module muldiv ( // Divide // Start a divide when a new division instruction is received and the divider isn't already busy or finishing // When F extensions are supported, use the FPU divider instead - if (`F_SUPPORTED) begin + if (`IDIV_ON_FPU) begin assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0;