From 7d0d9dcebe704a464cb156e635bf2d215762daa6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 10 Nov 2023 18:01:13 -0800 Subject: [PATCH] divider cleanup --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 18 +++++++++--------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 8 ++++---- src/fpu/unpackinput.sv | 6 ------ 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index bba6e8005..d5c571940 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,12 +30,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] IntResultBits, + input logic [P.DIVBLEN:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, FPResultBits; // number of fractional bits - logic [P.DIVBLEN:0] ResultBits; // number of result bits; + logic [P.DURLEN+1:0] Nf, FPResultBitsE; // number of fractional bits + logic [P.DIVBLEN:0] ResultBitsE; // number of result bits; // DIVN = P.NF+3 // NS = NF + 1 @@ -72,16 +72,16 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // Integer division needs p fractional + r integer result bits // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits // FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits - // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBits / rk) + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) always_comb begin - if (SqrtE) FPResultBits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 rather than +2; is it related to DIVCOPIES logic below? - else FPResultBits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs + if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1 and +0 rather than +2; is it related to DIVCOPIES logic below? + else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) ResultBits = IntDivE ? IntResultBits : FPResultBits; - else ResultBits = FPResultBits; + if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; + else ResultBitsE = FPResultBitsE; - assign CyclesE = (ResultBits-1)/(P.RK) + 1; // ceil (ResultBits/rk) + assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 66ba957e8..e950a40bd 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -54,7 +54,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] UeE; // Result Exponent (FP only) logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVBLEN:0] mE, ell; // Leading zeros of inputs - logic [P.DIVBLEN:0] IntResultBits; // bits in integer result + logic [P.DIVBLEN:0] IntResultBitsE; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division @@ -126,7 +126,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); /* verilator lint_off WIDTH */ - assign IntResultBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) @@ -137,7 +137,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.LOGRK-1:0] IntTrunc, RightShiftX; logic [P.DIVBLEN:0] IntSteps; /* verilator lint_offf WIDTH */ - assign RightShiftX = P.RK - 1 - ((IntResultBits - 1) % P.RK); // Right shift amount + assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting @@ -190,7 +190,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBits, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs logic [P.DIVBLEN:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index c551e8173..b3d7f901e 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing always_comb if (BadNaNBox) begin -// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; end else PostBox = In; @@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) P.FMT: PostBox = In; -// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; -// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]}; P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}}; default: PostBox = 'x; @@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) 2'b11: PostBox = In; -// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]}; -// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]}; -// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]}; 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}}; 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}}; 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};