diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 63ffab71..bdbfbde0 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -56,11 +56,11 @@ module fdivsqrt( // output logic [`XLEN-1:0] RemM, ); - logic [`DIVb+3:0] WS, WC; + logic [`DIVb+3:0] WS, WC; logic [`DIVb+3:0] X; - logic [`DIVN-2:0] D; // U0.N-1 - logic [`DIVN-2:0] Dpreproc; - logic [`DIVb:0] FirstU, FirstUM; + logic [`DIVb-1:0] D; + logic [`DIVb-1:0] DPreproc; + logic [`DIVb:0] FirstU, FirstUM; logic [`DIVb+1:0] FirstC; logic Firstun; logic WZero; @@ -71,7 +71,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc, .n, .m, .OTFCSwap, .ALTBM, .BZero, .As, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( @@ -81,7 +81,7 @@ module fdivsqrt( .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, - .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), + .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 72cde394..2948713b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -40,42 +40,34 @@ module fdivsqrtiter( // input logic SqrtM, input logic OTFCSwap, input logic [`DIVb+3:0] X, - input logic [`DIVN-2:0] Dpreproc, - output logic [`DIVN-2:0] D, // U0.N-1 - output logic [`DIVb:0] FirstU, FirstUM, + input logic [`DIVb-1:0] DPreproc, + output logic [`DIVb-1:0] D, + output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, - output logic [`DIVb+3:0] FirstWS, FirstWC + output logic [`DIVb+3:0] FirstWS, FirstWC ); -//QLEN = 1.(number of bits created for division) -// N is NF+1 or XLEN -// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift -// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0 -// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0 -// U/UM should be 1.b so b+1 bits or b:0 -// C needs to be the lenght of the final fraction 0.b so b or b-1:0 - /* verilator lint_off UNOPTFLAT */ - logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b - logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b - logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b - logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b - logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b - logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b - logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b - logic [`DIVb+1:0] initC; // Q2.b + /* verilator lint_off UNOPTFLAT */ + logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b + logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b + logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b + logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b + logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b + logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b + logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b + logic [`DIVb+1:0] initC; // Q2.b logic [`DIVCOPIES-1:0] un; - /* verilator lint_on UNOPTFLAT */ - logic [`DIVb+3:0] WSN, WCN; // Q4.N-1 - logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1 - logic [`DIVb+1:0] NextC; - logic [`DIVb+1:0] CMux; - logic [`DIVb:0] UMux, UMMux; - logic [`DIVb:0] initU, initUM; - + logic [`DIVb+3:0] WSN, WCN; // Q4.b + logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b + logic [`DIVb+1:0] NextC; + logic [`DIVb+1:0] CMux; + logic [`DIVb:0] UMux, UMMux; + logic [`DIVb:0] initU, initUM; + /* verilator lint_on UNOPTFLAT */ // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -85,15 +77,15 @@ module fdivsqrtiter( // Residual WS/SC registers/initializaiton mux mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN); mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN); - flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]); - flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]); + flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]); + flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]); // UOTFC Result U and UM registers/initialization mux // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0; assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; - mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); - mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); + mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); + mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]); flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]); @@ -103,18 +95,18 @@ module fdivsqrtiter( assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10; assign initC = {initCUpper, {`DIVb{1'b0}}}; mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux); - flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]); + flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]); // Divisior register - flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D); + flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); // Divisor Selections // - choose the negitive version of what's being selected - // - D is only the fraction - assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}}; + // - D is a 0.b mantissa + assign DBar = {3'b111, 1'b0, ~D}; if(`RADIX == 4) begin : d2 - assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}}; - assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}}; + assign DBar2 = {2'b11, 1'b0, ~D, 1'b1}; + assign D2 = {2'b0, 1'b1, D, 1'b0}; end // k=DIVCOPIES of the recurrence logic diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index f009cfd8..7ac8229b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -32,8 +32,8 @@ module fdivsqrtpostproc( input logic [`DIVb+3:0] WS, WC, - input logic [`DIVN-2:0] D, // U0.N-1 - input logic [`DIVb:0] FirstU, FirstUM, + input logic [`DIVb-1:0] D, + input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, input logic Firstun, input logic SqrtM, @@ -41,12 +41,12 @@ module fdivsqrtpostproc( input logic [`XLEN-1:0] ForwardedSrcAE, input logic RemOpM, ALTBM, BZero, As, input logic [`DIVBLEN:0] n, m, - output logic [`DIVb:0] QmM, + output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM ); - logic [`DIVb+3:0] W, Sum, RemD; + logic [`DIVb+3:0] W, Sum, RemDM; logic [`DIVb:0] PreQmM; logic NegStickyM, PostIncM; logic weq0; @@ -78,14 +78,14 @@ module fdivsqrtpostproc( assign Sum = WC + WS; assign W = $signed(Sum) >>> `LOGR; assign NegStickyM = W[`DIVb+3]; - assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}}; + assign RemDM = {4'b0000, D}; // Integer division: sign handling for div and rem always_comb if (~As) if (NegStickyM) begin NormQuotM = FirstUM; - NormRemM = W + RemD; + NormRemM = W + RemDM; PostIncM = 0; end else begin NormQuotM = FirstU; @@ -99,7 +99,7 @@ module fdivsqrtpostproc( PostIncM = 0; end else begin NormQuotM = FirstU; - NormRemM = W - RemD; + NormRemM = W - RemDM; PostIncM = 1; end diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b0678099..e74934e5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -45,22 +45,21 @@ module fdivsqrtpreproc ( output logic OTFCSwap, ALTBM, BZero, As, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, - output logic [`DIVN-2:0] Dpreproc + output logic [`DIVb-1:0] DPreproc ); - // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`NF-1:0] PreprocA, PreprocX; - logic [`NF-1:0] PreprocB, PreprocY; - logic [`NF+1:0] SqrtX; + + logic [`DIVb-1:0] XPreproc; + logic [`DIVb:0] SqrtX; logic [`DIVb+3:0] DivX; - logic [`NE+1:0] Qe; + logic [`NE+1:0] QeE; // Intdiv signals - logic [`DIVb-1:0] ZeroBufX, ZeroBufY; + logic [`DIVb-1:0] IFNormLenX, IFNormLenD; logic [`XLEN-1:0] PosA, PosB; - logic Bs, OTFCSwapTemp, ALTBE; + logic Bs, CalcOTFCSwap, ALTBE; logic [`XLEN-1:0] A64, B64; logic [`DIVBLEN:0] Calcn, Calcm; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; - logic [`DIVBLEN:0] pPlusr, pPrCeil, p, L; + logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; logic [`LOGRK-1:0] pPrTrunc; logic [`DIVb+3:0] PreShiftX; @@ -72,39 +71,38 @@ module fdivsqrtpreproc ( assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE; assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE; - assign OTFCSwapTemp = (As ^ Bs) & MDUE; + assign CalcOTFCSwap = (As ^ Bs) & MDUE; assign PosA = As ? -A64 : A64; assign PosB = Bs ? -B64 : B64; assign BZero = |ForwardedSrcBE; - assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}}; - assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}}; - lzc #(`DIVb) lzcX (ZeroBufX, L); - lzc #(`DIVb) lzcY (ZeroBufY, Calcm); + assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}}; + assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}}; + lzc #(`DIVb) lzcX (IFNormLenX, ell); + lzc #(`DIVb) lzcY (IFNormLenD, Calcm); - assign PreprocX = Xm[`NF-1:0]<> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)}; + assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; assign Calcn = (pPrCeil << `LOGK) - 1; assign IntBits = (`DIVBLEN)'(`RK) + p; assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; - assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; - assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; + assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF + assign DivX = {3'b000, ~XZero, XPreproc}; // *** explain why X is shifted between radices (initial assignment of WS=RX) - if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX; - else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX; + if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX; + else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX; assign X = MDUE ? DivX >> RightShiftX : PreShiftX; - assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; // radix 2 radix 4 // 1 copies DIVLEN+2 DIVLEN+2/2 @@ -116,12 +114,12 @@ module fdivsqrtpreproc ( // r = 1 or 2 // DIVRESLEN/(r*`DIVCOPIES) - flopen #(`NE+2) expreg(clk, IFDivStartE, Qe, QeM); - flopen #(1) swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap); + flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n); flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m); - expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe); + expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE)); endmodule @@ -130,7 +128,7 @@ module expcalc( input logic [`NE-1:0] Xe, Ye, input logic Sqrt, input logic XZero, - input logic [`DIVBLEN:0] L, m, + input logic [`DIVBLEN:0] ell, m, output logic [`NE+1:0] Qe ); logic [`NE-2:0] Bias; @@ -162,10 +160,10 @@ module expcalc( 2'h2: Bias = (`NE-1)'(`H_BIAS); endcase end - assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS); + assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; // correct exponent for denormalized input's normalization shifts - assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; + assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; assign Qe = Sqrt ? SExp : DExp; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index b4c2527d..088aff3a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -32,7 +32,7 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 ( - input logic [`DIVN-2:0] D, + input logic [`DIVb-1:0] D, input logic [`DIVb+3:0] DBar, input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, @@ -69,7 +69,7 @@ module fdivsqrtstage2 ( always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; // qz - else Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un + else Dsel = {3'b000, 1'b1, D}; // un // Partial Product Generation // WSA, WCA = WS + WC - qD diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index fb203fd7..f006b047 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module fdivsqrtstage4 ( - input logic [`DIVN-2:0] D, + input logic [`DIVb-1:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, @@ -61,7 +61,7 @@ module fdivsqrtstage4 ( // 0010 = -1 // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; - assign Dmsbs = D[`DIVN-2:`DIVN-4]; + assign Dmsbs = D[`DIVb-1:`DIVb-3]; assign WCmsbs = WC[`DIVb+3:`DIVb-4]; assign WSmsbs = WS[`DIVb+3:`DIVb-4]; @@ -77,7 +77,7 @@ module fdivsqrtstage4 ( 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; + 4'b0010: Dsel = {3'b0, 1'b1, D}; 4'b0001: Dsel = D2; default: Dsel = 'x; endcase diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 2d32ea39..9bd99aee 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1413,6 +1413,7 @@ string imperas32f[] = '{ string arch32f[] = '{ `RISCVARCHTEST, + "rv32i_m/F/src/fdiv_b20-01.S", "rv32i_m/F/src/fadd_b10-01.S", "rv32i_m/F/src/fadd_b1-01.S", "rv32i_m/F/src/fadd_b11-01.S",