Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2025-02-11 06:05:49 +00:00 · 2022-09-21 12:20:12 -05:00 · 2022-09-21 12:20:12 -05:00 · 832658838d
commit 832658838d
parent ac864a6ca3 129b9343fe
11 changed files with 38 additions and 28 deletions
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -102,7 +102,7 @@
 `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))

 // division constants
-`define RADIX 32'h2
+`define RADIX 32'h4
 `define DIVCOPIES 32'h3
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
 // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@ -61,6 +61,7 @@ module fdivsqrt(
  logic [`DIVb+1:0] FirstC;
  logic Firstun;
  logic WZero;
+  logic SpecialCaseM;

  fdivsqrtpreproc fdivsqrtpreproc(
    .clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
@ -69,11 +70,11 @@ module fdivsqrt(
    .clk, .reset, .FmtE, .XsE, .SqrtE, 
    .DivBusy, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, 
    .XNaNE, .YNaNE,
-    .XInfE, .YInfE, .WZero);
+    .XInfE, .YInfE, .WZero, .SpecialCaseM);
  fdivsqrtiter fdivsqrtiter(
    .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, 
    .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, 
    .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
    .DivBusy);
-  fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .QmM, .WZero, .DivSM);
+  fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZero, .DivSM);
 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfgen4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfgen4.sv
@ -31,7 +31,7 @@
 `include "wally-config.vh"

 module fdivsqrtfgen4 (
-  input  logic [3:0] u,
+  input  logic [3:0] udigit,
  input  logic [`DIVb+3:0] C, U, UM,
  output logic [`DIVb+3:0] F
 );
@ -47,9 +47,9 @@ module fdivsqrtfgen4 (
  // Choose which adder input will be used

  always_comb
-    if (u[3])       F = F2;
-    else if (u[2])  F = F1;
-    else if (U[1])  F = FN1;
-    else if (u[0])  F = FN2;
+    if (udigit[3])       F = F2;
+    else if (udigit[2])  F = F1;
+    else if (udigit[1])  F = FN1;
+    else if (udigit[0])  F = FN2;
    else            F = F0;
 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@ -44,18 +44,20 @@ module fdivsqrtfsm(
  input  logic StallM,
  input logic WZero,
  output logic DivDone,
-  output logic DivBusy
+  output logic DivBusy,
+  output logic SpecialCaseM
 );
  
  typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
  statetype state;

  logic [`DURLEN-1:0] step;
-  logic SpecialCase;
  logic [`DURLEN-1:0] cycles;
+  logic SpecialCaseE;

  // terminate immediately on special cases
-  assign SpecialCase = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
+  assign SpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
+  flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc

 // DIVN = `NF+3
 // NS = NF + 1
@ -103,7 +105,7 @@ module fdivsqrtfsm(
          step <= cycles; // *** this should be adjusted to depend on the precision; sqrt should use one fewer step becasue firststep=1
 //          $display("Setting Nf = %d fbits %d cycles = %d FmtE %d FPSIZES = %d Q_NF = %d num = %d denom = %d\n", Nf, fbits, cycles, FmtE, `FPSIZES, `Q_NF,
 //          (fbits +(`LOGR*`DIVCOPIES)-1), (`LOGR*`DIVCOPIES));
-          if (SpecialCase) state <= #1 DONE;
+          if (SpecialCaseE) state <= #1 DONE;
          else             state <= #1 BUSY;
      end else if (DivDone) begin
        if (StallM) state <= #1 DONE;
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@ -37,6 +37,7 @@ module fdivsqrtpostproc(
  input logic [`DIVb+1:0] FirstC,
  input logic  Firstun,
  input logic SqrtM,
+  input logic SpecialCaseM,
  output logic [`DIVb:0] QmM, 
  output logic WZero,
  output logic DivSM
@ -64,7 +65,7 @@ module fdivsqrtpostproc(
  end else begin
    assign WZero = weq0;
  end 
-  assign DivSM = ~WZero;
+  assign DivSM = ~WZero & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide

  // Determine if sticky bit is negative
  assign W = WC+WS;
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@ -61,6 +61,8 @@ module fdivsqrtpreproc (

  assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
  assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
+
+  // *** explain why X is shifted between radices
  if (`RADIX == 2)  assign X = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
  else              assign X = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
  assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv
@ -35,7 +35,7 @@ module fdivsqrtqsel4 (
  input logic [4:0] Smsbs,
  input logic [`DIVb+3:0] WS, WC,
  input logic Sqrt, j1,
-  output logic [3:0] u
+  output logic [3:0] udigit
 );
 	logic [6:0] Wmsbs;
 	logic [7:0] PreWmsbs;
@ -107,6 +107,6 @@ module fdivsqrtqsel4 (
      else if (Smsbs == 5'b10000) A = 3'b111;
      else A = Smsbs[2:0];
    end else A = Dmsbs;
-	assign u = USel4[{A,Wmsbs}];
+	assign udigit = USel4[{A,Wmsbs}];
 	
 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@ -46,7 +46,7 @@ module fdivsqrtstage4 (
 /* verilator lint_on UNOPTFLAT */

  logic [`DIVb+3:0]  Dsel;
-  logic [3:0]     u;
+  logic [3:0]     udigit;
  logic [`DIVb+3:0] F;
  logic [`DIVb+3:0] AddIn;
  logic [4:0] Smsbs;
@ -61,11 +61,11 @@ module fdivsqrtstage4 (
 	// 0010 = -1
 	// 0001 = -2
  assign Smsbs = U[`DIVb:`DIVb-4];
-  fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .u);
-  fdivsqrtfgen4 fgen4(.u, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
+  fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit);
+  fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);

  always_comb
-  case (u)
+  case (udigit)
    4'b1000: Dsel = DBar2;
    4'b0100: Dsel = DBar;
    4'b0000: Dsel = '0;
@ -77,10 +77,10 @@ module fdivsqrtstage4 (
  // Partial Product Generation
  //  WSA, WCA = WS + WC - qD
  assign AddIn = SqrtM ? F : Dsel;
-  assign CarryIn = ~SqrtM & (u[3] | u[2]); // +1 for 2's complement of -D and -2D 
+  assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D 
  csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
 
-  fdivsqrtuotfc4 fdivsqrtuotfc4(.u, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
+  fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);

  assign un = 0; // unused for radix 4
 endmodule
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv
@ -31,7 +31,7 @@
 `include "wally-config.vh"

 module fdivsqrtuotfc4(
-  input  logic [3:0]   u,
+  input  logic [3:0]   udigit,
  input  logic         Sqrt,
  input  logic [`DIVb:0] U, UM,
  input  logic [`DIVb:0] C,
@ -47,19 +47,19 @@ module fdivsqrtuotfc4(
  assign K3 = (C & ~(C << 2));      // 3K

  always_comb begin
-    if (u[3]) begin
+    if (udigit[3]) begin
      UNext  = U | K2;
      UMNext = U | K1;
-    end else if (u[2]) begin
+    end else if (udigit[2]) begin
      UNext  = U | K1;
      UMNext = U;
-    end else if (u[1]) begin
+    end else if (udigit[1]) begin
      UNext  = UM | K3;
      UMNext = UM | K2;
-    end else if (u[0]) begin
+    end else if (udigit[0]) begin
      UNext  = UM | K2;
      UMNext = UM | K1;
-    end else begin        // digit = 0
+    end else begin        // udigit = 0
      UNext  = U;
      UMNext = UM | K3;
    end 
--- a/pipelined/src/fpu/postproc/divshiftcalc.sv
+++ b/pipelined/src/fpu/postproc/divshiftcalc.sv
@ -73,8 +73,10 @@ module divshiftcalc(
    assign DivDenormShiftAmt = DivDenormShiftPos ? DivDenormShift[`LOGNORMSHIFTSZ-1:0] : '0;
    assign DivShiftAmt = DivResDenorm ? DivDenormShiftAmt : NormShift;

+    // *** explain why radix 4 division needs a left shift by 1
+    // *** can this shift be moved into the shiftcorrection logic?
    if (`RADIX == 4)
-        assign DivShiftIn = {{`NF{1'b0}}, DivQm[`DIVb-1:0], {`NORMSHIFTSZ-`DIVb+2-`NF{1'b0}}};
+        assign DivShiftIn = Sqrt ? {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1-`NF{1'b0}}} : {{`NF{1'b0}}, DivQm[`DIVb-1:0], {`NORMSHIFTSZ-`DIVb+2-`NF{1'b0}}};
    else
        assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb+1-`NF{1'b0}}};
 endmodule
--- a/pipelined/src/fpu/postproc/flags.sv
+++ b/pipelined/src/fpu/postproc/flags.sv
@ -128,10 +128,12 @@ module flags(
    //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
    //                  |                    |                    |                                      |                     |               |
    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid);
+   //assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&G)))&(R|S|G))&~(InfIn|NaNIn|DivByZero|Invalid|XZero);

    // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
    //      - Don't set the underflow flag if an underflowed res isn't outputed
    assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid);
+    //assign FpInexact = (S|G|Overflow|R)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);

    //                  if the res is too small to be represented and not 0
    //                  |                                     and if the res is not invalid (outside the integer bounds)