diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index e9fbc6042..df581701b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,12 +30,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] nE, + input logic [P.DIVBLEN:0] IntResultBits, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits - logic [P.DURLEN-1:0] fpcycles; // number of cycles for floating-point operation + logic [P.DURLEN+1:0] Nf, FPResultBits; // number of fractional bits + logic [P.DIVBLEN:0] ResultBits; // number of result bits; // DIVN = P.NF+3 // NS = NF + 1 @@ -68,13 +68,13 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( endcase always_comb begin - if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? - // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - assign fpcycles = (fbits-1)/(P.RK) + 1; + if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? + else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : fpcycles; - else CyclesE = fpcycles; + if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; + else ResultBits = FPResultBits; + + assign CyclesE = (ResultBits-1)/(P.RK) + 1; end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 2255aafb1..ab0941aca 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,6 +54,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.DIVBLEN:0] IntResultBits; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -122,7 +123,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros) - mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + + /* verilator lint_off WIDTH */ + assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; @@ -131,15 +136,14 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( if (P.LOGRK > 0) begin // more than 1 bit per cycle logic [P.LOGRK-1:0] IntTrunc, RightShiftX; - logic [P.DIVBLEN:0] TotalIntBits, IntSteps; + logic [P.DIVBLEN:0] IntSteps; /* verilator lint_off WIDTH */ // n = k*ceil((r+p)/rk) - 1 - assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) - assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit - assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + assign IntTrunc = IntResultBits % P.RK; // Truncation check for ceiling operator + assign IntSteps = (IntResultBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div + assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits = total digits - 1 integer digit + assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; @@ -192,7 +196,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs // pipeline registers