Revert "Changed weird D sizing. Better names in preproc. Finalized Int/Float input to divider."

This reverts commit fb221d7b64.
2025-02-11 06:05:49 +00:00 · 2022-12-04 00:01:58 +00:00 · 2022-12-04 00:01:58 +00:00 · 350fdd944d
commit 350fdd944d
parent 87ce09f7d9
10 changed files with 76 additions and 185923 deletions
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@ -58,8 +58,8 @@ module fdivsqrt(

  logic [`DIVb+3:0]  WS, WC;
  logic [`DIVb+3:0] X;
-  logic [`DIVb-1:0] D;
-  logic [`DIVb-1:0] DPreproc;
+  logic [`DIVN-2:0]  D; // U0.N-1
+  logic [`DIVN-2:0] Dpreproc;
  logic [`DIVb:0] FirstU, FirstUM;
  logic [`DIVb+1:0] FirstC;
  logic Firstun;
@ -71,7 +71,7 @@ module fdivsqrt(

  fdivsqrtpreproc fdivsqrtpreproc(
    .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
-    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc, 
+    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, 
    .n, .m, .OTFCSwap, .ALTBM, .BZero, .As,
    .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
  fdivsqrtfsm fdivsqrtfsm(
@ -81,7 +81,7 @@ module fdivsqrt(
    .XInfE, .YInfE, .WZero, .SpecialCaseM);
  fdivsqrtiter fdivsqrtiter(
    .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM, 
-    .X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
+    .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
    .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
    .FDivBusyE);
  fdivsqrtpostproc fdivsqrtpostproc(
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv
@ -40,34 +40,41 @@ module fdivsqrtiter(
 //  input  logic SqrtM,
  input  logic OTFCSwap,
  input  logic [`DIVb+3:0] X,
-  input  logic [`DIVb-1:0] DPreproc,
-  output logic [`DIVb-1:0] D,
-  output logic [`DIVb:0]   FirstU, FirstUM,
+  input  logic [`DIVN-2:0] Dpreproc,
+  output logic [`DIVN-2:0]  D, // U0.N-1
+  output logic [`DIVb:0] FirstU, FirstUM,
  output logic [`DIVb+1:0] FirstC,
  output logic             Firstun,
-  output logic [`DIVb+3:0] FirstWS, FirstWC
+  output logic [`DIVb+3:0]  FirstWS, FirstWC
 );

+//QLEN = 1.(number of bits created for division)
+// N is NF+1 or XLEN
+// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
+// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0
+// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
+// U/UM should be 1.b so b+1 bits or b:0
+// C needs to be the lenght of the final fraction 0.b so b or b-1:0
 /* verilator lint_off UNOPTFLAT */
-  logic [`DIVb+3:0]       WSNext[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]       WCNext[`DIVCOPIES-1:0]; // Q4.b
-  logic [`DIVb+3:0]       WS[`DIVCOPIES:0];       // Q4.b
-  logic [`DIVb+3:0]       WC[`DIVCOPIES:0];       // Q4.b
-  logic [`DIVb:0]         U[`DIVCOPIES:0];        // U1.b
-  logic [`DIVb:0]         UM[`DIVCOPIES:0];       // U1.b
-  logic [`DIVb:0]         UNext[`DIVCOPIES-1:0];  // U1.b
-  logic [`DIVb:0]         UMNext[`DIVCOPIES-1:0]; // U1.b
-  logic [`DIVb+1:0]       C[`DIVCOPIES:0];        // Q2.b
-  logic [`DIVb+1:0]       initC;                  // Q2.b
-  logic [`DIVCOPIES-1:0]  un;
+  logic [`DIVb+3:0]  WSNext[`DIVCOPIES-1:0]; // Q4.b
+  logic [`DIVb+3:0]  WCNext[`DIVCOPIES-1:0]; // Q4.b
+  logic [`DIVb+3:0]  WS[`DIVCOPIES:0]; // Q4.b
+  logic [`DIVb+3:0]  WC[`DIVCOPIES:0]; // Q4.b
+  logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
+  logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
+  logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
+  logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
+  logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
+  logic [`DIVb+1:0] initC; // Q2.b
+  logic [`DIVCOPIES-1:0] un; 

 /* verilator lint_on UNOPTFLAT */
-  logic [`DIVb+3:0] WSN, WCN;        // Q4.b
-  logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
+  logic [`DIVb+3:0]  WSN, WCN; // Q4.N-1
+  logic [`DIVb+3:0]  DBar, D2, DBar2; // Q4.N-1
  logic [`DIVb+1:0] NextC;
  logic [`DIVb+1:0] CMux;
-  logic [`DIVb:0]   UMux, UMMux;
-  logic [`DIVb:0]   initU, initUM;
+  logic [`DIVb:0] UMux, UMMux;
+  logic [`DIVb:0] initU, initUM;


  // Top Muxes and Registers
@ -78,15 +85,15 @@ module fdivsqrtiter(
  // Residual WS/SC registers/initializaiton mux
  mux2   #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
  mux2   #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
-  flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
-  flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
+  flopen   #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
+  flopen   #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);

  // UOTFC Result U and UM registers/initialization mux
  // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
  assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
  assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; 
-  mux2   #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
-  mux2   #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
+  mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
+  mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
  flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
  flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]);

@ -99,15 +106,15 @@ module fdivsqrtiter(
  flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]);

   // Divisior register
-  flopen #(`DIVb) dflop(clk, IFDivStartE, DPreproc, D);
+  flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D);

  // Divisor Selections
  //  - choose the negitive version of what's being selected
  //  - D is only the fraction
-  assign DBar    = {3'b111, 1'b0, ~D};
+  assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
  if(`RADIX == 4) begin : d2
-    assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
-    assign D2    = {2'b00, 1'b1, D, 1'b0};
+    assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
+    assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
  end

  // k=DIVCOPIES of the recurrence logic
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@ -32,7 +32,7 @@

 module fdivsqrtpostproc(
  input  logic [`DIVb+3:0] WS, WC,
-  input  logic [`DIVb-1:0]  D,
+  input  logic [`DIVN-2:0]  D, // U0.N-1
  input  logic [`DIVb:0] FirstU, FirstUM, 
  input  logic [`DIVb+1:0] FirstC,
  input  logic Firstun,
@ -46,7 +46,7 @@ module fdivsqrtpostproc(
  output logic DivSM
 );
  
-  logic [`DIVb+3:0] W, Sum, RemDM;
+  logic [`DIVb+3:0] W, Sum, RemD;
  logic [`DIVb:0] PreQmM;
  logic NegStickyM, PostIncM;
  logic weq0;
@ -78,14 +78,14 @@ module fdivsqrtpostproc(
  assign Sum = WC + WS;
  assign W = $signed(Sum) >>> `LOGR;
  assign NegStickyM = W[`DIVb+3];
-  assign RemDM = {4'b0000, D};
+  assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}};

  // Integer division: sign handling for div and rem
  always_comb 
    if (~As)
      if (NegStickyM) begin
        NormQuotM = FirstUM;
-        NormRemM  = W + RemDM;
+        NormRemM  = W + RemD;
        PostIncM  = 0;
      end else begin
        NormQuotM = FirstU;
@ -99,7 +99,7 @@ module fdivsqrtpostproc(
        PostIncM  = 0;
      end else begin 
        NormQuotM = FirstU;
-        NormRemM  = W - RemDM;
+        NormRemM  = W - RemD;
        PostIncM  = 1;
      end

--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@ -45,21 +45,22 @@ module fdivsqrtpreproc (
  output logic OTFCSwap, ALTBM, BZero, As,
  output logic [`NE+1:0] QeM,
  output logic [`DIVb+3:0] X,
-  output logic [`DIVb-1:0] DPreproc
+  output logic [`DIVN-2:0] Dpreproc
 );
-
-  logic  [`DIVb-1:0] XPreproc;
-  logic  [`DIVb:0] SqrtX;
+  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+  logic  [`NF-1:0] PreprocA, PreprocX;
+  logic  [`NF-1:0] PreprocB, PreprocY;
+  logic  [`NF+1:0] SqrtX;
  logic  [`DIVb+3:0] DivX;
-  logic  [`NE+1:0] QeE;
+  logic  [`NE+1:0] Qe;
  // Intdiv signals
-  logic  [`DIVb-1:0] IFNormLenX, IFNormLenD;
+  logic  [`DIVb-1:0] ZeroBufX, ZeroBufY;
  logic  [`XLEN-1:0] PosA, PosB;
-  logic  Bs, CalcOTFCSwap, ALTBE;
+  logic  Bs, OTFCSwapTemp, ALTBE;
  logic  [`XLEN-1:0] A64, B64;
  logic  [`DIVBLEN:0] Calcn, Calcm;
  logic  [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
-  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
+  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, L;
  logic  [`LOGRK-1:0] pPrTrunc;
  logic  [`DIVb+3:0] PreShiftX;

@ -71,21 +72,21 @@ module fdivsqrtpreproc (
  assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
  assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;

-  assign CalcOTFCSwap = (As ^ Bs) & MDUE;
+  assign OTFCSwapTemp = (As ^ Bs) & MDUE;
  
  assign PosA = As ? -A64 : A64;
  assign PosB = Bs ? -B64 : B64;
  assign BZero = |ForwardedSrcBE;

-  assign IFNormLenX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
-  assign IFNormLenD = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
-  lzc #(`DIVb) lzcX (IFNormLenX, ell);
-  lzc #(`DIVb) lzcY (IFNormLenD, Calcm);
+  assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
+  assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
+  lzc #(`DIVb) lzcX (ZeroBufX, L);
+  lzc #(`DIVb) lzcY (ZeroBufY, Calcm);

-  assign XPreproc = IFNormLenX << ell;
-  assign DPreproc = IFNormLenD << Calcm;
+  assign PreprocX = Xm[`NF-1:0]<<L;
+  assign PreprocY = Ym[`NF-1:0]<<Calcm;

-  assign ZeroDiff = Calcm - ell;
+  assign ZeroDiff = Calcm - L;
  assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B
  assign p = ALTBE ? '0 : ZeroDiff;

@ -96,13 +97,14 @@ module fdivsqrtpreproc (
  assign IntBits = (`DIVBLEN)'(`RK) + p;
  assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};

-  assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
-  assign DivX = {3'b000, ~XZero, XPreproc};
+  assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
+  assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};

  // *** explain why X is shifted between radices (initial assignment of WS=RX)
-  if (`RADIX == 2)  assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
-  else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
+  if (`RADIX == 2)  assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
+  else              assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
  assign X = MDUE ? DivX >> RightShiftX : PreShiftX;
+  assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};

  //           radix 2     radix 4
  // 1 copies  DIVLEN+2    DIVLEN+2/2
@ -114,12 +116,12 @@ module fdivsqrtpreproc (
  // r = 1 or 2
  // DIVRESLEN/(r*`DIVCOPIES)

-  flopen #(`NE+2)    expreg(clk, IFDivStartE, QeE, QeM);
-  flopen #(1)       swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap);
+  flopen #(`NE+2)    expreg(clk, IFDivStartE, Qe, QeM);
+  flopen #(1)       swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap);
  flopen #(1)       altbreg(clk, IFDivStartE, ALTBE, ALTBM);
  flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n);
  flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m);
-  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE));
+  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);

 endmodule

@ -128,7 +130,7 @@ module expcalc(
  input  logic [`NE-1:0] Xe, Ye,
  input  logic Sqrt,
  input  logic XZero, 
-  input  logic [`DIVBLEN:0] ell, m,
+  input  logic [`DIVBLEN:0] L, m,
  output logic [`NE+1:0] Qe
  );
  logic [`NE-2:0] Bias;
@ -160,10 +162,10 @@ module expcalc(
            2'h2: Bias =  (`NE-1)'(`H_BIAS);
        endcase
  end
-  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
+  assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS);
  assign SExp  = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
  // correct exponent for denormalized input's normalization shifts
-  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
+  assign DExp  = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
  
  assign Qe = Sqrt ? SExp : DExp;
 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv
@ -32,7 +32,7 @@

 /* verilator lint_off UNOPTFLAT */
 module fdivsqrtstage2 (
-  input  logic [`DIVb-1:0] D,
+  input  logic [`DIVN-2:0] D,
  input  logic [`DIVb+3:0]  DBar, 
  input  logic [`DIVb:0] U, UM,
  input  logic [`DIVb+3:0]  WS, WC,
@ -69,7 +69,7 @@ module fdivsqrtstage2 (
  always_comb
    if      (up) Dsel = DBar;
    else if (uz) Dsel = '0; // qz
-    else         Dsel = {3'b000, 1'b1, D}; // un
+    else         Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un

  // Partial Product Generation
  //  WSA, WCA = WS + WC - qD
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@ -31,7 +31,7 @@
 `include "wally-config.vh"

 module fdivsqrtstage4 (
-  input  logic [`DIVb-1:0] D,
+  input  logic [`DIVN-2:0] D,
  input  logic [`DIVb+3:0]  DBar, D2, DBar2,
  input  logic [`DIVb:0] U, UM,
  input  logic [`DIVb+3:0]  WS, WC,
@ -61,7 +61,7 @@ module fdivsqrtstage4 (
 	// 0010 = -1
 	// 0001 = -2
  assign Smsbs = U[`DIVb:`DIVb-4];
-  assign Dmsbs = D[`DIVb-1:`DIVb-3];
+  assign Dmsbs = D[`DIVN-2:`DIVN-4];
  assign WCmsbs = WC[`DIVb+3:`DIVb-4];
  assign WSmsbs = WS[`DIVb+3:`DIVb-4];

@ -77,7 +77,7 @@ module fdivsqrtstage4 (
      4'b1000: Dsel = DBar2;
      4'b0100: Dsel = DBar;
      4'b0000: Dsel = '0;
-      4'b0010: Dsel = {3'b0, 1'b1, D};
+      4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
      4'b0001: Dsel = D2;
      default: Dsel = 'x;
    endcase
--- a/pipelined/testbench/fp/vectors/f16_mul_rd.tv
+++ b/pipelined/testbench/fp/vectors/f16_mul_rd.tv
--- a/pipelined/testbench/fp/vectors/f16_mul_rne.tv
+++ b/pipelined/testbench/fp/vectors/f16_mul_rne.tv
--- a/pipelined/testbench/fp/vectors/f16_mul_ru.tv
+++ b/pipelined/testbench/fp/vectors/f16_mul_ru.tv
--- a/pipelined/testbench/fp/vectors/f16_mul_rz.tv
+++ b/pipelined/testbench/fp/vectors/f16_mul_rz.tv