From fedf9c8a5ab2bdb708e01a6d1bece387d0cf8572 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 08:46:55 -0800 Subject: [PATCH 01/11] Started cleaning up shifting leading 1 in fdivsqrt --- config/shared/config-shared.vh | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 48f02b848..acc7996cb 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -94,15 +94,15 @@ localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); // division constants -localparam DIVN = (((NF+2 Date: Fri, 10 Nov 2023 09:11:15 -0800 Subject: [PATCH 02/11] fdivsqrt parameter cleanup --- config/shared/config-shared.vh | 13 ++++++------- config/shared/parameter-defs.vh | 3 +-- src/cvw.sv | 1 - src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 3 ++- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++--------- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index acc7996cb..17b1ede83 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -94,15 +94,14 @@ localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); // division constants -localparam DIVN = ((NF+2>> NormShiftM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 8f3c477c4..0e716ac20 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -48,7 +48,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] AM ); - logic [P.DIVb-1:0] Xfract, Dfract; + logic [P.DIVb:0] Xfract, Dfract; logic [P.DIVb:0] PreSqrtX; logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.NE+1:0] QeE; // Quotient Exponent (FP only) @@ -103,12 +103,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// // count leading zeros for Subnorm FP and to normalize integer inputs - lzc #(P.DIVb) lzcX (IFX[P.DIVb:1], ell); - lzc #(P.DIVb) lzcY (IFD[P.DIVb:1], mE); + lzc #(P.DIVb+1) lzcX (IFX, ell); + lzc #(P.DIVb+1) lzcY (IFD, mE); // Normalization shift: shift off leading one - assign Xfract = (IFX[P.DIVb:1] << ell) << 1; - assign Dfract = (IFD[P.DIVb:1] << mE) << 1; + assign Xfract = (IFX << ell); + assign Dfract = (IFD << mE); ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary @@ -158,10 +158,10 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( // it comes out in the wash and gives the right answer. Investigate later if possible. ////////////////////////////////////////////////////// - assign DivX = {3'b000, ~NumerZeroE, Xfract}; + assign DivX = {3'b000, Xfract}; // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + mux2 #(P.DIVb+1) sqrtxmux(Xfract, {1'b0, Xfract[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -176,8 +176,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( assign X = PreShiftX; end - // Divisior register - flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); + // Divisior register + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dfract}, D); // Floating-point exponent fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); From 4d77f28a1947e6c295ca9900fe5768aff3c0f47a Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 11:21:02 -0800 Subject: [PATCH 03/11] Divsqrt cleanup: change Q to U, commenting code --- src/fpu/fdivsqrt/fdivsqrt.sv | 8 +++---- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtexpcalc.sv | 11 ++++++--- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 12 +++++----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 36 +++++++++++++++++----------- src/fpu/fpu.sv | 12 +++++----- src/fpu/postproc/divshiftcalc.sv | 28 +++++++++++----------- src/fpu/postproc/postprocess.sv | 12 +++++----- src/fpu/postproc/round.sv | 6 ++--- src/fpu/postproc/shiftcorrection.sv | 8 +++---- 10 files changed, 74 insertions(+), 61 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 5c5fa0f57..60e42f457 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic DivStickyM, output logic FDivBusyE, IFDivStartE, FDivDoneE, - output logic [P.NE+1:0] QeM, - output logic [P.DIVb:0] QmM, + output logic [P.NE+1:0] UeM, // Exponent result + output logic [P.DIVb:0] UmM, // Significand result output logic [P.XLEN-1:0] FIntDivResultM ); @@ -74,7 +74,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .nM, .mM, .AM, @@ -94,7 +94,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, - .QmM, .WZeroE, .DivStickyM, + .UmM, .WZeroE, .DivStickyM, // Int-specific .nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index ed28c9355..2122317fe 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -68,7 +68,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); + if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits -1)/(P.RK) + 1; else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index 5531276df..113f2b2dd 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -32,8 +32,9 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( input logic Sqrt, input logic XZero, input logic [P.DIVBLEN:0] ell, m, - output logic [P.NE+1:0] Qe + output logic [P.NE+1:0] Ue ); + logic [P.NE-2:0] Bias; logic [P.NE+1:0] SXExp; logic [P.NE+1:0] SExp; @@ -63,10 +64,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( 2'h2: Bias = (P.NE-1)'(P.H_BIAS); endcase end + + // Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS); assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias}; - // correct exponent for subnormal input's normalization shifts + // division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); - assign Qe = Sqrt ? SExp : DExp; + + // Select square root or division exponent + assign Ue = Sqrt ? SExp : DExp; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 9f887d4ab..2b9be54a7 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -38,14 +38,14 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, input logic [P.DIVBLEN:0] nM, mM, - output logic [P.DIVb:0] QmM, + output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, output logic [P.XLEN-1:0] FIntDivResultM ); logic [P.DIVb+3:0] W, Sum; - logic [P.DIVb:0] PreQmM; + logic [P.DIVb:0] PreUmM; logic NegStickyM; logic weq0E, WZeroM; logic [P.XLEN-1:0] IntDivResultM; @@ -91,17 +91,17 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. assign Sum = WC + WS; assign NegStickyM = Sum[P.DIVb+3]; - mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit - mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM); + mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit + mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM); - // Integer quotient or remainder correctoin, normalization, and special cases + // Integer quotient or remainder correction, normalization, and special cases if (P.IDIV_ON_FPU) begin:intpostproc // Int supported logic [P.DIVBLEN:0] NormShiftM; logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; logic signed [P.DIVb+3:0] PreResultM, PreIntResultM; assign W = $signed(Sum) >>> P.LOGR; - assign UnsignedQuotM = {3'b000, PreQmM}; + assign UnsignedQuotM = {3'b000, PreUmM}; // Integer remainder: sticky and sign correction muxes assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 0e716ac20..2255aafb1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -35,7 +35,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, - output logic [P.NE+1:0] QeM, + output logic [P.NE+1:0] UeM, output logic [P.DIVb+3:0] X, D, // Int-specific input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B @@ -48,10 +48,10 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] AM ); - logic [P.DIVb:0] Xfract, Dfract; + logic [P.DIVb:0] Xnorm, Dnorm; logic [P.DIVb:0] PreSqrtX; logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed - logic [P.NE+1:0] QeE; // Quotient Exponent (FP only) + logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) @@ -106,9 +106,9 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( lzc #(P.DIVb+1) lzcX (IFX, ell); lzc #(P.DIVb+1) lzcY (IFD, mE); - // Normalization shift: shift off leading one - assign Xfract = (IFX << ell); - assign Dfract = (IFD << mE); + // Normalization shift: shift leading one into most significant bit + assign Xnorm = (IFX << ell); + assign Dnorm = (IFD << mE); ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary @@ -133,10 +133,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] TotalIntBits, IntSteps; /* verilator lint_off WIDTH */ + // n = k*ceil((r+p)/rk) - 1 assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits + assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps /* verilator lint_on WIDTH */ @@ -150,18 +151,25 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// // Floating-Point Preprocessing - // append leading 1 (for nonzero inputs) + // Extend to Q4.b format // shift square root to be in range [1/4, 1) // Normalized numbers are shifted right by 1 if the exponent is odd // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. // NOTE: there might be a discrepancy that X is never right shifted by 2. However - // it comes out in the wash and gives the right answer. Investigate later if possible. + // it comes out in the wash and gives the right answer. Investigate later if possible. *** ////////////////////////////////////////////////////// - assign DivX = {3'b000, Xfract}; + assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(P.DIVb+1) sqrtxmux(Xfract, {1'b0, Xfract[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter + // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2) + // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even + // Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits + // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4) + // For Radix 2, this gives 3 leading 1s, followed by the fraction bits + // For Radix 4, this gives 2 leading 1s, followed by the fraction bits (and a zero in the lsb) + mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -177,11 +185,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( end // Divisior register - flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dfract}, D); + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D); // Floating-point exponent - fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM); + fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE)); + flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index f71999471..ffd9cf49a 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) // divide signals - logic [P.DIVb:0] QmM; // fdivsqrt signifcand - logic [P.NE+1:0] QeM; // fdivsqrt exponent + logic [P.DIVb:0] UmM; // fdivsqrt signifcand + logic [P.NE+1:0] UeM; // fdivsqrt exponent logic DivStickyM; // fdivsqrt sticky bit logic FDivDoneE, IFDivStartE; // fdivsqrt control signals logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) @@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, - .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, - .QmM, .FIntDivResultM); + .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM, + .UmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), @@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////////////////////////////////////////// postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), - .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), - .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv index d560714db..380f8f5e6 100644 --- a/src/fpu/postproc/divshiftcalc.sv +++ b/src/fpu/postproc/divshiftcalc.sv @@ -27,8 +27,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module divshiftcalc import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb:0] DivQm, // divsqrt significand - input logic [P.NE+1:0] DivQe, // divsqrt exponent + input logic [P.DIVb:0] DivUm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input output logic DivResSubnorm, // is the divsqrt result subnormal @@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( // is the result subnormal // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes - assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]); + assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]); // if the result is subnormal - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivQe+NF+1-1 - assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe; + // Left shift amount = DivUe+NF+1-1 + assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe; assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1]; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivQe+1 - // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivUe+1 + // 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF); @@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; // pre-shift the divider result for normalization - assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; + assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index ee96b34d2..05db352cd 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit - input logic [P.NE+1:0] DivQe, // divsqrt exponent - input logic [P.DIVb:0] DivQm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent + input logic [P.DIVb:0] DivUm, // divsqrt significand // conversion signals input logic CvtCs, // the result's sign input logic [P.NE:0] CvtCe, // the calculated expoent @@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // division singals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input - logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift logic DivByZero; // divide by zero flag logic DivResSubnorm; // is the divsqrt result subnormal logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) @@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); // select which unit's output to shift always_comb @@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // correct for LZA/divsqrt error shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, - .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); + .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // calulate result sign used in rounding unit roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, + round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index 0a5d9ecc5..e01ff376b 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) ( // divsqrt input logic DivOp, // is a division opperation being done input logic DivSticky, // divsqrt sticky bit - input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent + input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent // cvt input logic CvtOp, // is a convert opperation being done input logic ToInt, // is the cvt op a cvt to integer @@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) ( case(PostProcSel) 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt - // 2'b01: Me = DivDone ? Qe : '0; // divide - 2'b01: Me = Qe; // divide + // 2'b01: Me = DivDone ? Ue : '0; // divide + 2'b01: Me = Ue; // divide default: Me = '0; endcase diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 9e0473667..f5860b42d 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // divsqrt input logic DivOp, // is it a divsqrt opperation input logic DivResSubnorm, // is the divsqrt result subnormal - input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent + input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) //fma input logic FmaOp, // is it an fma opperation @@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // output output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction - output logic [P.NE+1:0] Qe // corrected exponent for divider + output logic [P.NE+1:0] Ue // corrected exponent for divider ); logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction @@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // correct the shifting of the divsqrt caused by producing a result in (2, .5] range // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) - assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); + assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1)); assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2]; assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1]; mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); @@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift - assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1}; + assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; endmodule From 083ed09f1e759b09720b0bc207d112c19b1be543 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 11:25:54 -0800 Subject: [PATCH 04/11] Reduced duplicated logic in fdivsqrtcycles --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 2122317fe..e9fbc6042 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -33,7 +33,10 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.DIVBLEN:0] nE, output logic [P.DURLEN-1:0] CyclesE ); + logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits + logic [P.DURLEN-1:0] fpcycles; // number of cycles for floating-point operation + // DIVN = P.NF+3 // NS = NF + 1 // N = NS or NS+2 for div/sqrt. @@ -68,8 +71,10 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits -1)/(P.RK) + 1; - else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); + assign fpcycles = (fbits-1)/(P.RK) + 1; + + if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : fpcycles; + else CyclesE = fpcycles; end /* verilator lint_on WIDTH */ From b8bdb1c7d148ed6609594ce7530bbe68e53bedaa Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:00:27 -0800 Subject: [PATCH 05/11] Simplified cycle count logic --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 22 +++++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index e9fbc6042..df581701b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,12 +30,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] nE, + input logic [P.DIVBLEN:0] IntResultBits, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits - logic [P.DURLEN-1:0] fpcycles; // number of cycles for floating-point operation + logic [P.DURLEN+1:0] Nf, FPResultBits; // number of fractional bits + logic [P.DIVBLEN:0] ResultBits; // number of result bits; // DIVN = P.NF+3 // NS = NF + 1 @@ -68,13 +68,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( endcase always_comb begin - if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? - // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - assign fpcycles = (fbits-1)/(P.RK) + 1; + if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? + else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : fpcycles; - else CyclesE = fpcycles; + if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; + else ResultBits = FPResultBits; + + assign CyclesE = (ResultBits-1)/(P.RK) + 1; end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 2255aafb1..ab0941aca 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,6 +54,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVBLEN:0] IntResultBits; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -122,7 +123,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros) - mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + + /* verilator lint_off WIDTH */ + assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; @@ -131,15 +136,14 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( if (P.LOGRK > 0) begin // more than 1 bit per cycle logic [P.LOGRK-1:0] IntTrunc, RightShiftX; - logic [P.DIVBLEN:0] TotalIntBits, IntSteps; + logic [P.DIVBLEN:0] IntSteps; /* verilator lint_off WIDTH */ // n = k*ceil((r+p)/rk) - 1 - assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) - assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit - assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + assign IntTrunc = IntResultBits % P.RK; // Truncation check for ceiling operator + assign IntSteps = (IntResultBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div + assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit + assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; @@ -192,7 +196,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs // pipeline registers From 72ad1d361c939c36cb99627d941a74e03433697b Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:28:57 -0800 Subject: [PATCH 06/11] Simplified IntDivNormShift --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 9 +++++- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 4 +-- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 41 ++++++++++++++-------------- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 60e42f457..751486f86 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic [P.DIVBLEN:0] mM, IntDivNormShiftM; // Shift amounts logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases @@ -77,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .BZeroM, .nM, .mM, .AM, + .BZeroM, .IntDivNormShiftM, .mM, .AM, .IntDivM, .W64M, .ALTBM, .AsM, .BsM); fdivsqrtfsm #(P) fdivsqrtfsm( // FSM @@ -96,6 +96,6 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .UmM, .WZeroE, .DivStickyM, // Int-specific - .nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .IntDivNormShiftM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index df581701b..bba6e8005 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -67,6 +67,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( P.Q_FMT: Nf = P.Q_NF; endcase + // Cycle logic + // P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk. + // Integer division needs p fractional + r integer result bits + // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits + // FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBits / rk) + always_comb begin if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs @@ -74,7 +81,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; else ResultBits = FPResultBits; - assign CyclesE = (ResultBits-1)/(P.RK) + 1; + assign CyclesE = (ResultBits-1)/(P.RK) + 1; // ceil (ResultBits/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 2b9be54a7..58649e3a8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,7 +37,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic Firstun, SqrtM, SpecialCaseM, input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, - input logic [P.DIVBLEN:0] nM, mM, + input logic [P.DIVBLEN:0] mM, IntDivNormShiftM, output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, @@ -111,7 +111,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Select quotient or remainder and do normalization shift localparam DIVa = (P.DIVb+1-P.XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result - mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); + mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftM, (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); assign PreIntResultM = $signed(PreResultM >>> NormShiftM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index ab0941aca..35757e480 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] nM, mM, + output logic [P.DIVBLEN:0] mM, IntDivNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM @@ -53,7 +53,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs logic [P.DIVBLEN:0] IntResultBits; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division @@ -126,27 +126,21 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ - assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps - + // calculate right shift amount RightShiftX to complete in discrete number of steps if (P.LOGRK > 0) begin // more than 1 bit per cycle logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] IntSteps; - /* verilator lint_off WIDTH */ - // n = k*ceil((r+p)/rk) - 1 - assign IntTrunc = IntResultBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (IntResultBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit + /* verilator lint_offf WIDTH */ assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting - assign nE = p; assign DivXShifted = DivX; end end else begin @@ -199,17 +193,22 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs + logic [P.DIVBLEN:0] IntDivNormShiftE; + /* verilator lint_off WIDTH */ + assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + /* verilator lint_on WIDTH */ + // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); + flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntDivNormShiftE, IntDivNormShiftM); + flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end endmodule From f539f6171b6f63c1e12eaa0567fc70303812b7a8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:55:36 -0800 Subject: [PATCH 07/11] Simplified integer postnormalization shift --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 +++--- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 7 ++----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 12 ++++++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 751486f86..ac5c2c338 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] mM, IntDivNormShiftM; // Shift amounts + logic [P.DIVBLEN:0] IntNormShiftM; // Integer normalizatoin shift amount logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases @@ -77,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .BZeroM, .IntDivNormShiftM, .mM, .AM, + .BZeroM, .IntNormShiftM, .AM, .IntDivM, .W64M, .ALTBM, .AsM, .BsM); fdivsqrtfsm #(P) fdivsqrtfsm( // FSM @@ -96,6 +96,6 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .UmM, .WZeroE, .DivStickyM, // Int-specific - .IntDivNormShiftM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 58649e3a8..3b6115201 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,7 +37,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic Firstun, SqrtM, SpecialCaseM, input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, - input logic [P.DIVBLEN:0] mM, IntDivNormShiftM, + input logic [P.DIVBLEN:0] IntNormShiftM, output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, @@ -96,7 +96,6 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Integer quotient or remainder correction, normalization, and special cases if (P.IDIV_ON_FPU) begin:intpostproc // Int supported - logic [P.DIVBLEN:0] NormShiftM; logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; logic signed [P.DIVb+3:0] PreResultM, PreIntResultM; @@ -110,10 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); // Select quotient or remainder and do normalization shift - localparam DIVa = (P.DIVb+1-P.XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result - mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftM, (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); - assign PreIntResultM = $signed(PreResultM >>> NormShiftM); + assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM); // special case logic // terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B| diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 35757e480..137f54d99 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] mM, IntDivNormShiftM, + output logic [P.DIVBLEN:0] IntNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM @@ -193,10 +193,15 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs - logic [P.DIVBLEN:0] IntDivNormShiftE; + logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; + logic RemOpE; + /* verilator lint_off WIDTH */ assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + assign IntRemNormShiftE = mE + (P.DIVb+1-P.XLEN); // m + b - (N-1) for remainder normalization shift /* verilator lint_on WIDTH */ + assign RemOpE = Funct3E[1]; + mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); @@ -204,8 +209,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntDivNormShiftE, IntDivNormShiftM); - flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); From d92f3e0216a398a33d56523aad4a255b97f19b85 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 16:42:32 -0800 Subject: [PATCH 08/11] fdivsqrt cleanup --- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 3b6115201..e9fd2fd2c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -118,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( if (BZeroM) begin // Divide by zero if (RemOpM) IntDivResultM = AM; else IntDivResultM = {(P.XLEN){1'b1}}; - end else if (ALTBM) begin // Numerator is zero + end else if (ALTBM) begin // Numerator is small if (RemOpM) IntDivResultM = AM; else IntDivResultM = '0; end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 137f54d99..66ba957e8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -198,7 +198,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( /* verilator lint_off WIDTH */ assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain - assign IntRemNormShiftE = mE + (P.DIVb+1-P.XLEN); // m + b - (N-1) for remainder normalization shift + assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift /* verilator lint_on WIDTH */ assign RemOpE = Funct3E[1]; mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); From 1302a89baf7aa4a902d95faf48682fb97f1dcd46 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 18:01:13 -0800 Subject: [PATCH 09/11] divider cleanup --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 8 ++++---- src/fpu/unpackinput.sv | 6 ------ 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index bba6e8005..d5c571940 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,12 +30,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] IntResultBits, + input logic [P.DIVBLEN:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, FPResultBits; // number of fractional bits - logic [P.DIVBLEN:0] ResultBits; // number of result bits; + logic [P.DURLEN+1:0] Nf, FPResultBitsE; // number of fractional bits + logic [P.DIVBLEN:0] ResultBitsE; // number of result bits; // DIVN = P.NF+3 // NS = NF + 1 @@ -72,16 +72,16 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // Integer division needs p fractional + r integer result bits // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits // FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits - // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBits / rk) + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) always_comb begin - if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? - else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs + if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 and +0 rather than +2; is it related to DIVCOPIES logic below? + else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; - else ResultBits = FPResultBits; + if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; + else ResultBitsE = FPResultBitsE; - assign CyclesE = (ResultBits-1)/(P.RK) + 1; // ceil (ResultBits/rk) + assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 66ba957e8..e950a40bd 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,7 +54,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs - logic [P.DIVBLEN:0] IntResultBits; // bits in integer result + logic [P.DIVBLEN:0] IntResultBitsE; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -126,7 +126,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ - assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) @@ -137,7 +137,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] IntSteps; /* verilator lint_offf WIDTH */ - assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount + assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting @@ -190,7 +190,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index c551e8173..b3d7f901e 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing always_comb if (BadNaNBox) begin -// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; end else PostBox = In; @@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) P.FMT: PostBox = In; -// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; -// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]}; P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}}; default: PostBox = 'x; @@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) 2'b11: PostBox = In; -// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]}; -// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]}; -// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]}; 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}}; 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}}; 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}}; From 6ed5ba4a85c88529775df12641fad004c56b9fc3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 18:19:41 -0800 Subject: [PATCH 10/11] Simplified out LOGRK parameter --- config/shared/config-shared.vh | 5 ++--- config/shared/parameter-defs.vh | 1 - src/cvw.sv | 1 - src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 5 ++--- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 17b1ede83..10b56f24e 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -97,11 +97,10 @@ localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); localparam DIVN = ((NF+2