Merge pull request #93 from davidharrishmc/dev

NaN handling
2025-02-11 06:05:49 +00:00 · 2023-02-18 11:40:19 -06:00 · 2023-02-18 11:40:19 -06:00 · e0d5593792
commit e0d5593792
parent 1448ef1c6d 92d4acf118
5 changed files with 69 additions and 34 deletions
--- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@ -49,7 +49,7 @@ module fdivsqrtpostproc(
  logic [`DIVb+3:0] W, Sum, DM;
  logic [`DIVb:0] PreQmM;
  logic NegStickyM;
-  logic weq0E, weq0M, WZeroM;
+  logic weq0E, WZeroM;
  logic [`XLEN-1:0] IntDivResultM;

  //////////////////////////
@ -81,7 +81,6 @@ module fdivsqrtpostproc(
  //////////////////////////
 
  flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
-  flopenr #(1) WeqZeroMReg(clk, reset, ~StallM, weq0E, weq0M);

  //////////////////////////
  // Memory Stage: Postprocessing
--- a/src/fpu/fpu.sv
+++ b/src/fpu/fpu.sv
@ -110,6 +110,7 @@ module fpu (
  logic 		        XInfE, YInfE, ZInfE;                  // is the input infinity - execute stage
  logic 		        XInfM, YInfM, ZInfM;                  // is the input infinity - memory stage
  logic 		        XExpMaxE;                             // is the exponent all ones (max value)
+  logic [`FLEN-1:0] XPostBoxE;                            // X after fixing bad NaN box.  Needed for 1-input operations

  // Fma Signals
  logic             FmaAddSubE;                           // Multiply by 1.0 when adding or subtracting
@ -200,23 +201,20 @@ module fpu (
  mux3  #(`FLEN)  fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);

  // Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
-  generate
-    if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
-    else if(`FPSIZES == 2) 
-        mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
-    else if(`FPSIZES == 3 | `FPSIZES == 4) 
-        mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, 
-                            {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, 
-                            {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, 
-                            {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
-  endgenerate
+  if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
+  else if(`FPSIZES == 2) 
+      mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
+  else if(`FPSIZES == 3 | `FPSIZES == 4) 
+      mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, 
+                          {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, 
+                          {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, 
+                          {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
  assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
  mux2  #(`FLEN)  fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
  
  // Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
  // For add and subtract, Z comes from second source operand
-  generate
-  if(`FPSIZES == 1) assign BoxedZeroE = 0;
+ if(`FPSIZES == 1) assign BoxedZeroE = 0;
  else if(`FPSIZES == 2) 
    mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
  else if(`FPSIZES == 3 | `FPSIZES == 4)
@ -224,7 +222,6 @@ module fpu (
                                {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}, 
                                {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, 
                                (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
-  endgenerate
  assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
  mux3  #(`FLEN)  fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);

@ -234,7 +231,7 @@ module fpu (
    .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), 
    .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), 
    .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), 
-    .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
+    .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE));
  
  // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
  fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
@ -255,7 +252,7 @@ module fpu (
    .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));

  // sign injection: fsgnj/fsgnjx/fsgnjn
-  fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
+  fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XPostBoxE), .Fmt(FmtE), .SgnRes(SgnResE));

  // classify: fclass
  fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), 
@ -268,7 +265,6 @@ module fpu (


  // NaN Box SrcA to convert integer to requested FP size
-  generate
  if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
  else if(`FPSIZES == 2) 
    mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
@ -277,14 +273,12 @@ module fpu (
                            {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, 
                            {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, 
                            {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
-  endgenerate

  // select a result that may be written to the FP register
  mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
  assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);

  // select the result that may be written to the integer register - to IEU
-  generate
  if(`FPSIZES == 1)
    assign SgnExtXE = XE;
  else if(`FPSIZES == 2) 
@ -294,7 +288,7 @@ module fpu (
                                {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, 
                                {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, 
                                XE, FmtE, SgnExtXE); 
-  endgenerate
+
  if (`FLEN>`XLEN)
    assign IntSrcXE = SgnExtXE[`XLEN-1:0];
  else 
--- a/src/fpu/unpack.sv
+++ b/src/fpu/unpack.sv
@ -39,7 +39,8 @@ module unpack (
  output logic                    XSubnorm,             // is X subnormal
  output logic                    XZero, YZero, ZZero,  // is XYZ zero
  output logic                    XInf, YInf, ZInf,     // is XYZ infinity
-  output logic                    XExpMax               // does X have the maximum exponent (NaN or Inf)
+  output logic                    XExpMax,              // does X have the maximum exponent (NaN or Inf)
+  output logic [`FLEN-1:0]        XPostBox              // X after being properly NaN-boxed
 );

  logic XExpNonZero, YExpNonZero, ZExpNonZero;          // is the exponent of XYZ non-zero
@ -48,14 +49,17 @@ module unpack (
  
  unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
                          .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
-                          .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), .Subnorm(XSubnorm));
+                          .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), 
+                          .Subnorm(XSubnorm), .PostBox(XPostBox));

  unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
                          .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
-                          .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), .Subnorm());
+                          .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero), 
+                          .Subnorm(), .PostBox());

  unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
                          .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
-                          .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), .Subnorm());
+                          .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), 
+                          .Subnorm(), .PostBox());
 
 endmodule
--- a/src/fpu/unpackinput.sv
+++ b/src/fpu/unpackinput.sv
@ -40,13 +40,14 @@ module unpackinput (
  output logic                    Inf,        // is the number infinity
  output logic                    ExpNonZero, // is the exponent not zero
  output logic                    FracZero,   // is the fraction zero
-  output logic                    ExpMax,      // does In have the maximum exponent (NaN or Inf)
-  output logic                    Subnorm     // is the number subnormal
+  output logic                    ExpMax,     // does In have the maximum exponent (NaN or Inf)
+  output logic                    Subnorm,    // is the number subnormal
+  output logic [`FLEN-1:0]        PostBox     // Number reboxed correctly as a NaN
 );

  logic [`NF-1:0] Frac;       // Fraction of XYZ
-  logic           BadNaNBox;  // is the NaN boxing bad
-  
+  logic           BadNaNBox;  // incorrectly NaN Boxed
+
  if (`FPSIZES == 1) begin        // if there is only one floating point format supported
      assign BadNaNBox = 0;
      assign Sgn = In[`FLEN-1];  // sign bit
@ -54,6 +55,7 @@ module unpackinput (
      assign ExpNonZero = |In[`FLEN-2:`NF];  // is the exponent non-zero
      assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};  // exponent.  subnormal numbers have effective biased exponent of 1
      assign ExpMax = &In[`FLEN-2:`NF];  // is the exponent all 1's
+      assign PostBox = In;
  
  end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
      // largest format | smaller format
@ -75,9 +77,15 @@ module unpackinput (
      //      double and half

      assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
+      always_comb
+        if (BadNaNBox) begin
+//          PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
+          PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
+        end else 
+          PostBox = In;

      // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-      assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
+      assign Sgn = Fmt ? In[`FLEN-1] : (BadNaNBox ? 0 : In[`LEN1-1]); // improperly boxed NaNs are treated as positive

      // extract the fraction, add trailing zeroes to the mantissa if nessisary
      assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
@ -128,8 +136,23 @@ module unpackinput (
              default: BadNaNBox = 1'bx;
          endcase

+      always_comb
+        if (BadNaNBox) begin
+          case (Fmt)
+            `FMT: PostBox = In;
+//            `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, In[`LEN1-`NE1-3:0]};
+//            `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, In[`LEN2-`NE2-3:0]};
+            `FMT1: PostBox = {{(`FLEN-`LEN1){1'b1}}, 1'b1, {(`NE1+1){1'b1}}, {(`LEN1-`NE1-2){1'b0}}};
+            `FMT2: PostBox = {{(`FLEN-`LEN2){1'b1}}, 1'b1, {(`NE2+1){1'b1}}, {(`LEN2-`NE2-2){1'b0}}};
+            default: PostBox = 'x;
+          endcase
+        end else 
+          PostBox = In;
+
      // extract the sign bit
      always_comb
+        if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
+        else
          case (Fmt)
              `FMT:  Sgn = In[`FLEN-1];
              `FMT1: Sgn = In[`LEN1-1];
@ -137,7 +160,7 @@ module unpackinput (
              default: Sgn = 1'bx;
          endcase

-      // extract the fraction
+       // extract the fraction
      always_comb
          case (Fmt)
              `FMT: Frac = In[`NF-1:0];
@ -200,8 +223,24 @@ module unpackinput (
              2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
          endcase

+      always_comb
+        if (BadNaNBox) begin
+          case (Fmt)
+            2'b11: PostBox = In;
+//            2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, In[`D_LEN-`D_NE-3:0]};
+//            2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, In[`S_LEN-`S_NE-3:0]};
+//            2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, In[`H_LEN-`H_NE-3:0]};
+            2'b01: PostBox = {{(`Q_LEN-`D_LEN){1'b1}}, 1'b1, {(`D_NE+1){1'b1}}, {(`D_LEN-`D_NE-2){1'b0}}};
+            2'b00: PostBox = {{(`Q_LEN-`S_LEN){1'b1}}, 1'b1, {(`S_NE+1){1'b1}}, {(`S_LEN-`S_NE-2){1'b0}}};
+            2'b10: PostBox = {{(`Q_LEN-`H_LEN){1'b1}}, 1'b1, {(`H_NE+1){1'b1}}, {(`H_LEN-`H_NE-2){1'b0}}};
+          endcase
+        end else 
+          PostBox = In;
+
      // extract sign bit
      always_comb
+        if (BadNaNBox) Sgn = 0; // improperly boxed NaNs are treated as positive
+        else
          case (Fmt)
              2'b11: Sgn = In[`Q_LEN-1];
              2'b01: Sgn = In[`D_LEN-1];
--- a/src/ifu/bpred/bpred.sv
+++ b/src/ifu/bpred/bpred.sv
@ -72,7 +72,7 @@ module bpred (
  logic                     PredValidF;
  logic [1:0]               DirPredictionF;

-  logic [3:0]               BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE;
+  logic [3:0]               BTBPredInstrClassF, PredInstrClassF, PredInstrClassD;
  logic [`XLEN-1:0]         PredPCF, RASPCF;
  logic                     PredictionPCWrongE;
  logic                     AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
@ -207,8 +207,7 @@ module bpred (
  // pipeline the class
  flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD);
  flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE);
-  flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE);
-
+ 
  // Check the prediction
  // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address.
  // if the class prediction is wrong a regular instruction may have been predicted as a taken branch