From b315ead57507cc884d070c0f77e0a05f875f705b Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 14:28:57 -0800 Subject: [PATCH] Simplified IntDivNormShift --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 9 +++++- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 4 +-- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 41 ++++++++++++++-------------- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 60e42f457..751486f86 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,7 +67,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic [P.DIVBLEN:0] mM, IntDivNormShiftM; // Shift amounts logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases @@ -77,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .BZeroM, .nM, .mM, .AM, + .BZeroM, .IntDivNormShiftM, .mM, .AM, .IntDivM, .W64M, .ALTBM, .AsM, .BsM); fdivsqrtfsm #(P) fdivsqrtfsm( // FSM @@ -96,6 +96,6 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .UmM, .WZeroE, .DivStickyM, // Int-specific - .nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .IntDivNormShiftM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index df581701b..bba6e8005 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -67,6 +67,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( P.Q_FMT: Nf = P.Q_NF; endcase + // Cycle logic + // P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk. + // Integer division needs p fractional + r integer result bits + // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits + // FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBits / rk) + always_comb begin if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs @@ -74,7 +81,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; else ResultBits = FPResultBits; - assign CyclesE = (ResultBits-1)/(P.RK) + 1; + assign CyclesE = (ResultBits-1)/(P.RK) + 1; // ceil (ResultBits/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 2b9be54a7..58649e3a8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,7 +37,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( input logic Firstun, SqrtM, SpecialCaseM, input logic [P.XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, - input logic [P.DIVBLEN:0] nM, mM, + input logic [P.DIVBLEN:0] mM, IntDivNormShiftM, output logic [P.DIVb:0] UmM, // result significand output logic WZeroE, output logic DivStickyM, @@ -111,7 +111,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( // Select quotient or remainder and do normalization shift localparam DIVa = (P.DIVb+1-P.XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result - mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); + mux2 #(P.DIVBLEN+1) normshiftmux(IntDivNormShiftM, (mM + (P.DIVBLEN+1)'(DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); assign PreIntResultM = $signed(PreResultM >>> NormShiftM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index ab0941aca..35757e480 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] nM, mM, + output logic [P.DIVBLEN:0] mM, IntDivNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM @@ -53,7 +53,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs logic [P.DIVBLEN:0] IntResultBits; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division @@ -126,27 +126,21 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ - assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps - + // calculate right shift amount RightShiftX to complete in discrete number of steps if (P.LOGRK > 0) begin // more than 1 bit per cycle logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] IntSteps; - /* verilator lint_off WIDTH */ - // n = k*ceil((r+p)/rk) - 1 - assign IntTrunc = IntResultBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (IntResultBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit + /* verilator lint_offf WIDTH */ assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting - assign nE = p; assign DivXShifted = DivX; end end else begin @@ -199,17 +193,22 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs + logic [P.DIVBLEN:0] IntDivNormShiftE; + /* verilator lint_off WIDTH */ + assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + /* verilator lint_on WIDTH */ + // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); + flopen #(P.DIVBLEN+1) nsreg(clk, IFDivStartE, IntDivNormShiftE, IntDivNormShiftM); + flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end endmodule