Merge pull request #266 from davidharrishmc/dev

FDivSqrt cleanup
2023-04-21 20:23:23 -05:00 · 2023-04-21 20:23:23 -05:00 · 884c3c22d5
commit 884c3c22d5
parent 94d1533264 e11212598f
5 changed files with 85 additions and 57 deletions
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@ -138,10 +138,10 @@ module fctrl (
                                  endcase
                      7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)          
                                                ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass
-                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
+                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) 
-                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register
+                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q  fp to int register
-                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
+                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) 
-                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x   to fp reg
+                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x  int to fp reg
                      7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
                                                ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h)
                      7'b0100001: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b01)
--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@ -62,7 +62,7 @@ module fdivsqrt(
  logic [`DIVb+1:0]           FirstC;                       // Step tracker
  logic                       Firstun;                      // Quotient selection
  logic                       WZeroE;                       // Early termination flag
-  logic [`DURLEN-1:0]         cycles;                       // FSM cycles
+  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
  logic                       DivStartE;                    // Enable signal for flops during stall
@ -76,7 +76,7 @@ module fdivsqrt(
  fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
-    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles,
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
    // Int-specific 
    .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
    .BZeroM, .nM, .mM, .AM, 
@ -85,7 +85,7 @@ module fdivsqrt(
  fdivsqrtfsm fdivsqrtfsm(                                  // FSM
    .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
    .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
-    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles,
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
    // Int-specific 
    .IDivStartE, .ISpecialCaseE, .IntDivE);
--- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
@ -1,10 +1,10 @@
 ///////////////////////////////////////////
-// fdivsqrt.sv
+// fdivsqrtcycles.sv
 //
 // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
 // Modified: 18 April 2022
 //
-// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// Purpose: Determine number of cycles for divsqrt
 // 
 // Documentation: RISC-V System on Chip Design Chapter 13
 //
@ -33,7 +33,7 @@ module fdivsqrtcycles(
  input  logic                SqrtE,
  input  logic                IntDivE,
  input  logic [`DIVBLEN:0]   nE,
-  output logic [`DURLEN-1:0]  cycles
+  output logic [`DURLEN-1:0]  CyclesE
 );
  logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
  // DIVN = `NF+3
@ -68,8 +68,8 @@ module fdivsqrtcycles(
  always_comb begin 
    if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
    else       fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
-    if (`IDIV_ON_FPU) cycles =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    if (`IDIV_ON_FPU) CyclesE =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
-    else              cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    else              CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
  end 
  /* verilator lint_on WIDTH */
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@ -39,7 +39,7 @@ module fdivsqrtfsm(
  input  logic               StallM, FlushE,
  input  logic               IntDivE,
  input  logic               ISpecialCaseE,
-  input  logic [`DURLEN-1:0] cycles,
+  input  logic [`DURLEN-1:0] CyclesE,
  output logic               IFDivStartE,
  output logic               FDivBusyE, FDivDoneE,
  output logic               SpecialCaseM
@ -67,7 +67,7 @@ module fdivsqrtfsm(
          state <= #1 IDLE; 
      end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE
 //       end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE
-          step <= cycles; 
+          step <= CyclesE; 
          if (SpecialCaseE) state <= #1 DONE;
          else              state <= #1 BUSY;
      end else if (state == BUSY) begin 
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@ -43,44 +43,49 @@ module fdivsqrtpreproc (
  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
  input  logic                IntDivE, W64E,
  output logic                ISpecialCaseE,
-  output logic [`DURLEN-1:0]  cycles,
+  output logic [`DURLEN-1:0]  CyclesE,
  output logic [`DIVBLEN:0]   nM, mM,
  output logic                NegQuotM, ALTBM, IntDivM, W64M,
  output logic                AsM, BZeroM,
  output logic [`XLEN-1:0]    AM
 );
-  logic [`DIVb-1:0]           XPreproc, DPreproc;
+  logic [`DIVb-1:0]           Xfract, Dfract;
  logic [`DIVb:0]             PreSqrtX;
  logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
  logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
  logic [`DIVb-1:0]           IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
-  logic [`DIVBLEN:0]          mE, nE, ell;                             // Leading zeros of inputs
+  logic [`DIVBLEN:0]          mE, nE, ell;                         // Leading zeros of inputs
  logic                       NumerZeroE;                          // Numerator is zero (X or A)
  logic                       AZeroE, BZeroE;                      // A or B is Zero for integer division
-  logic                       signedDiv;                           // signed division
+  logic                       SignedDivE;                          // signed division
  logic                       NegQuotE;                            // Integer quotient is negative
  logic                       AsE, BsE;                            // Signs of integer inputs
  logic [`XLEN-1:0]           AE;                                  // input A after W64 adjustment
  logic  ALTBE;
  //////////////////////////////////////////////////////
  // Integer Preprocessing
  //////////////////////////////////////////////////////
  if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
    logic [`XLEN-1:0] BE, PosA, PosB;
    // Extract inputs, signs, zero, depending on W64 mode if applicable
-    assign signedDiv = ~Funct3E[0];
+    assign SignedDivE = ~Funct3E[0];
    // Source handling
    if (`XLEN==64) begin // 64-bit, supports W64
-      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE);
+      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
-      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE);
+      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
    end else begin // 32 bits only
      assign AE = ForwardedSrcAE;
      assign BE = ForwardedSrcBE;
     end
    assign AZeroE = ~(|AE);
    assign BZeroE = ~(|BE);
-    assign AsE = AE[`XLEN-1] & signedDiv;
+    assign AsE = AE[`XLEN-1] & SignedDivE;
-    assign BsE = BE[`XLEN-1] & signedDiv; 
+    assign BsE = BE[`XLEN-1] & SignedDivE; 
    assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
    // Force integer inputs to be postiive
@ -90,35 +95,35 @@ module fdivsqrtpreproc (
    // Select integer or floating point inputs
    mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
    mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
-
+    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
  end else begin // Int not supported
    assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
    assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
    assign NumerZeroE = XZeroE;
  end
  //////////////////////////////////////////////////////
  // Integer & FP leading zero and normalization shift
  //////////////////////////////////////////////////////
  // count leading zeros for Subnorm FP and to normalize integer inputs
  lzc #(`DIVb) lzcX (IFX, ell);
  lzc #(`DIVb) lzcY (IFD, mE);
  // Normalization shift: shift off leading one
-  assign XPreproc = (IFX << ell) << 1;
+  assign Xfract = (IFX << ell) << 1;
-  assign DPreproc = (IFD << mE)  << 1; 
+  assign Dfract = (IFD << mE)  << 1; 
-  // append leading 1 (for nonzero inputs)
+  // *** CT: move to fdivsqrtintpreshift
  // shift square root to be in range [1/4, 1)
  // Normalized numbers are shifted right by 1 if the exponent is odd
  // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
  assign DivX = {3'b000, ~NumerZeroE, XPreproc};
-   // Divisior register
+  //////////////////////////////////////////////////////
-  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
+  // Integer Right Shift to digit boundary
  //  Determine DivXShifted (X shifted to digit boundary)
  //  and nE (number of fractional digits)
  //////////////////////////////////////////////////////
  // ***CT: factor out fdivsqrtcycles
  if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
    logic [`DIVBLEN:0] ZeroDiff, p;
    logic  ALTBE;
    // calculate number of fractional bits p
    assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
@ -128,31 +133,68 @@ module fdivsqrtpreproc (
    // Integer special cases (terminate immediately)
    assign ISpecialCaseE = BZeroE | ALTBE;
  /* verilator lint_off WIDTH */
    // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
    if (`LOGRK > 0) begin // more than 1 bit per cycle
      logic [`LOGRK-1:0] IntTrunc, RightShiftX;
      logic [`DIVBLEN:0] TotalIntBits, IntSteps;
-
+      /* verilator lint_off WIDTH */
      assign TotalIntBits = `LOGR + p;                            // Total number of result bits (r integer bits plus p fractional bits)
      assign IntTrunc = TotalIntBits % `RK;                       // Truncation check for ceiling operator
      assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc;     // Number of steps for int div
      assign nE = (IntSteps * `DIVCOPIES) - 1;                    // Fractional digits
      assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK);  // Right shift amount
      assign DivXShifted = DivX >> RightShiftX;                   // shift X by up to R*K-1 to complete in nE steps
      /* verilator lint_on WIDTH */
    end else begin // radix 2 1 copy doesn't require shifting
      assign nE = p; 
      assign DivXShifted = DivX;
    end
-  /* verilator lint_on WIDTH */
+  end else begin
    assign ISpecialCaseE = 0;
  end
-    // Selet integer or floating-point operands
+  // CT *** fdivsqrtfplead1
-    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
+
  //////////////////////////////////////////////////////
  // Floating-Point Preprocessing
  // append leading 1 (for nonzero inputs)
  // shift square root to be in range [1/4, 1)
  // Normalized numbers are shifted right by 1 if the exponent is odd
  // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
  //////////////////////////////////////////////////////
  assign DivX = {3'b000, ~NumerZeroE, Xfract};
  // Sqrt is initialized on step one as R(X-1), so depends on Radix
  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
  //////////////////////////////////////////////////////
  // Selet integer or floating-point operands
  //////////////////////////////////////////////////////
  if (`IDIV_ON_FPU) begin
    mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
  end else begin
    assign X = PreShiftX;
  end
   // Divisior register
  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
  // Floating-point exponent
  fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
  // Number of FSM cycles (to FSM)
  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
  if (`IDIV_ON_FPU) begin:intpipelineregs
    // pipeline registers
-    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,     IntDivM);
+    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,  IntDivM);
    flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
    flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
    flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
@ -162,21 +204,7 @@ module fdivsqrtpreproc (
    flopen #(`XLEN)   srcareg(clk, IFDivStartE, AE,       AM);
    if (`XLEN==64) 
      flopen #(1)      w64reg(clk, IFDivStartE, W64E,     W64M);
  end else begin
    assign NumerZeroE = XZeroE;
    assign X = PreShiftX;
  end
  // Sqrt is initialized on step one as R(X-1), so depends on Radix
  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
  // Floating-point exponent
  fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
  // Number of FSM cycles (to FSM)
  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles);
 endmodule