Merge pull request #851 from kevindkim723/intdivb

Reduce Bit widths for IDIV on FPU
2025-02-11 06:05:49 +00:00 · 2024-07-05 21:42:19 -07:00 · 2024-07-05 21:42:19 -07:00 · 9f5e7b8653
commit 9f5e7b8653
parent 764f46a174 ec4d4e2a8b
6 changed files with 33 additions and 15 deletions
--- a/config/shared/config-shared.vh
+++ b/config/shared/config-shared.vh
@ -97,13 +97,18 @@ localparam RK          = LOGR*DIVCOPIES;                            // r*k bits
 localparam FPDIVMINb   = NF + 2; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right
 localparam DIVMINb     = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
 localparam RESBITS     = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
-
+                 
 // division constants
 localparam FPDUR       = (RESBITS-1)/RK + 1 ;                       // ceiling((r+b)/rk)
 localparam DIVb        = FPDUR*RK - LOGR;                           // divsqrt fractional bits, so total number of bits is a multiple of rk after r integer bits
 localparam DURLEN      = $clog2(FPDUR);                             // enough bits to count the duration
 localparam DIVBLEN     = $clog2(DIVb+1);                            // enough bits to count number of fractional bits + 1 integer bit
 // integer division/remainder constants
 localparam INTRESBITS     = XLEN + LOGR; // number of bits in a result: r integer + XLEN fractional
 localparam INTFPDUR       = (INTRESBITS-1)/RK + 1 ;                 
 localparam INTDIVb        = INTFPDUR*RK - LOGR;                     
 // largest length in IEU/FPU
 localparam BASECVTLEN = `max(XLEN, NF); // convert length excluding Zfa fcvtmod.w.d
 localparam CVTLEN = (ZFA_SUPPORTED & D_SUPPORTED) ? `max(BASECVTLEN, 32'd84) : BASECVTLEN; // fcvtmod.w.d needs at least 32+52 because a double with 52 fractional bits might be into upper bits of 32 bit word
--- a/config/shared/parameter-defs.vh
+++ b/config/shared/parameter-defs.vh
@ -199,5 +199,7 @@ localparam cvw_t P = '{
  FPDUR       : FPDUR,
  DURLEN      : DURLEN,
  DIVb        : DIVb,
-  DIVBLEN     : DIVBLEN
+  DIVBLEN     : DIVBLEN,
  INTDIVb     : INTDIVb
 };
--- a/src/cvw.sv
+++ b/src/cvw.sv
@ -294,6 +294,9 @@ typedef struct packed {
  int DURLEN     ;
  int DIVb       ;
  int DIVBLEN    ;
 // integer division/remainder constants
  int INTDIVb    ;
 } cvw_t;
 endpackage
--- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@ -45,7 +45,8 @@ module fdivsqrtpostproc import cvw::*;  #(parameter cvw_t P) (
  output logic [P.XLEN-1:0]    FIntDivResultM     // U/Q(XLEN.0)
 );
-  logic [P.DIVb+3:0]         W, Sum;
+  logic [P.DIVb+3:0]         Sum;
  logic [P.INTDIVb+3:0]      W;
  logic [P.DIVb:0]           PreUmM;
  logic                      NegStickyM;
  logic                      weq0E, WZeroM;
@ -97,21 +98,27 @@ module fdivsqrtpostproc import cvw::*;  #(parameter cvw_t P) (
  // Integer quotient or remainder correction, normalization, and special cases
  if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
-    logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
+    logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
-    logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
+    logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM;
    logic [P.INTDIVb+3:0] DTrunc, SumTrunc;
    assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb];
    assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb];
    assign W = $signed(SumTrunc) >>> P.LOGR;
    assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]};
    assign W = $signed(Sum) >>> P.LOGR;
    assign UnsignedQuotM = {3'b000, PreUmM};
    // Integer remainder: sticky and sign correction muxes
    assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
-    mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
+    mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM);
-    mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
+
    mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
    // Select quotient or remainder and do normalization shift
-    mux2 #(P.DIVb+4)    presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
+    mux2 #(P.INTDIVb+4)    presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM);
-    assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM); 
+    assign PreResultShiftedM = PreResultM >> IntNormShiftM;
    mux2 #(P.INTDIVb+4)    preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM);
    // special case logic
    // terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@ -119,7 +119,7 @@ module fdivsqrtpreproc import cvw::*;  #(parameter cvw_t P) (
  //////////////////////////////////////////////////////
  if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
-    logic [P.DIVBLEN-1:0] ZeroDiff, p;
+    logic [P.DIVBLEN-1:0] ZeroDiff,p;
    // calculate number of fractional bits p
    assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
@ -218,8 +218,8 @@ module fdivsqrtpreproc import cvw::*;  #(parameter cvw_t P) (
    logic               RemOpE;
    /* verilator lint_off WIDTH */
-    assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift.  n = (Cycles * k - 1)
+    assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift.  n = (Cycles * k - 1)
-    assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1));           // m + b - (N-1) for remainder normalization shift
+    assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1));           // m + b - (N-1) for remainder normalization shift
    /* verilator lint_on WIDTH */
    assign RemOpE = Funct3E[1];
    mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@ -1802,6 +1802,7 @@ string imperas32f[] = '{
    };
  string arch64d_fma[] = '{
    `RISCVARCHTEST,
    //"rv64i_m/D/src/fmadd.d_b15-01.S",