Parameterized FMA. However, some offsets are not parameterized. See PR #793 for list of changes

2025-02-02 17:55:19 +00:00 · 2024-05-13 15:16:00 -07:00 · 2024-05-13 15:16:00 -07:00 · 175c18da01
commit 175c18da01
parent 2dfada0687
10 changed files with 100 additions and 104 deletions
--- a/config/shared/config-shared.vh
+++ b/config/shared/config-shared.vh
@ -128,12 +128,10 @@ localparam FMALEN = 3*NF + 6;
 //     because NORMSHIFTSZ becomes limited by convert rather than divider
 //     The two extra bits are necessary because shiftcorrection dropped them for fcvt.
 //     May be possible to remove these two bits by modifying shiftcorrection
-localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2));
-//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2));
+//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (FMALEN + 2));
+localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN + 2));

 localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));                  // log_2(NORMSHIFTSZ)
-localparam CORRSHIFTSZ = NORMSHIFTSZ-2;                             // Drop leading 2 integer bits
-

 // Disable spurious Verilator warnings

--- a/config/shared/parameter-defs.vh
+++ b/config/shared/parameter-defs.vh
@ -196,7 +196,6 @@ localparam cvw_t P = '{
  FMALEN : FMALEN,
  NORMSHIFTSZ : NORMSHIFTSZ,
  LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
-  CORRSHIFTSZ : CORRSHIFTSZ,
  LOGR        : LOGR,
  RK          : RK,
  FPDUR       : FPDUR,
--- a/src/cvw.sv
+++ b/src/cvw.sv
@ -288,7 +288,6 @@ typedef struct packed {
  int NORMSHIFTSZ;
  int LOGNORMSHIFTSZ;
  int FMALEN;
-  int CORRSHIFTSZ;

 // division constants
  int LOGR       ;
--- a/src/fpu/fma/fmaalign.sv
+++ b/src/fpu/fma/fmaalign.sv
@ -31,14 +31,14 @@ module fmaalign import cvw::*;  #(parameter cvw_t P) (
  input  logic [P.NE-1:0]      Xe, Ye, Ze,          // biased exponents in B(NE.0) format
  input  logic [P.NF:0]        Zm,                  // significand in U(0.NF) format]
  input  logic                 XZero, YZero, ZZero, // is the input zero
-  output logic [3*P.NF+5:0]    Am,                  // addend aligned for addition in U(NF+5.2NF+1)
+  output logic [P.FMALEN-1:0]  Am,                  // addend aligned for addition in U(NF+5.2NF+1)
  output logic                 ASticky,             // Sticky bit calculated from the aliged addend
  output logic                 KillProd             // should the product be set to zero
 );

  logic [P.NE+1:0]             ACnt;                // how far to shift the addend to align with the product in Q(NE+2.0) format
-  logic [4*P.NF+5:0]           ZmShifted;           // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
-  logic [4*P.NF+5:0]           ZmPreshifted;        // input to the alignment shifter U(NF+5.3NF+1)
+  logic [P.FMALEN+P.NF-1:0]    ZmShifted;           // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
+  logic [P.FMALEN+P.NF-1:0]    ZmPreshifted;        // input to the alignment shifter U(NF+5.3NF+1)
  logic                        KillZ;               // should the addend be killed

  ///////////////////////////////////////////////////////////////////////////////
@ -56,7 +56,7 @@ module fmaalign import cvw::*;  #(parameter cvw_t P) (
  //  |   54'b0    |  106'b(product)  | 2'b0 |
  //  | addnend    |

-  assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)};
+  assign ZmPreshifted = {Zm,(P.FMALEN-1)'(0)};
  assign KillProd     = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
  assign KillZ        = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5));

@ -87,6 +87,6 @@ module fmaalign import cvw::*;  #(parameter cvw_t P) (
    end
  end

-  assign Am = ZmShifted[4*P.NF+5:P.NF];
+  assign Am = ZmShifted[P.FMALEN+P.NF-1:P.NF];

 endmodule
--- a/src/fpu/fpu.sv
+++ b/src/fpu/fpu.sv
@ -119,14 +119,14 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
  // Fma Signals
  logic                        FmaAddSubE;                         // Multiply by 1.0 when adding or subtracting
  logic [1:0]                  FmaZSelE;                           // Select Z = Y when adding or subtracting, 0 when multiplying
-  logic [3*P.NF+5:0]           SmE, SmM;                           // Sum significand
+  logic [P.FMALEN-1:0]         SmE, SmM;                           // Sum significand
  logic                        FmaAStickyE, FmaAStickyM;           // FMA addend sticky bit output
  logic [P.NE+1:0]             SeE,SeM;                            // Sum exponent
  logic                        InvAE, InvAM;                       // Invert addend
  logic                        AsE, AsM;                           // Addend sign
  logic                        PsE, PsM;                           // Product sign
  logic                        SsE, SsM;                           // Sum sign
-  logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM;                       // LZA sum leading zero count
+  logic [$clog2(P.FMALEN+1)-1:0] SCntE, SCntM;                       // LZA sum leading zero count
  
  // Cvt Signals
  logic [P.NE:0]               CeE, CeM;                           // convert intermediate expoent
@ -358,8 +358,8 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
    {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
    {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
  flopenrc #(2)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM});      
-  flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
-  flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
+  flopenrc #(P.FMALEN) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
+  flopenrc #($clog2(P.FMALEN+1)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
    {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
    {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
  flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
--- a/src/fpu/postproc/fmashiftcalc.sv
+++ b/src/fpu/postproc/fmashiftcalc.sv
@ -30,13 +30,13 @@
 module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
  input  logic [P.FMTBITS-1:0]         Fmt,                 // precision 1 = double 0 = single
  input  logic [P.NE+1:0]              FmaSe,               // sum's exponent
-  input  logic [3*P.NF+5:0]            FmaSm,               // the positive sum
-  input  logic [$clog2(3*P.NF+7)-1:0]  FmaSCnt,             // normalization shift count
+  input  logic [P.FMALEN-1:0]          FmaSm,               // the positive sum
+  input  logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt,             // normalization shift count
  output logic [P.NE+1:0]              NormSumExp,          // exponent of the normalized sum not taking into account Subnormal or zero results
-  output logic                         FmaSZero,            // is the result subnormal - calculated before LZA corection
+  output logic                         FmaSZero,            //  is the sum zero
  output logic                         FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
-  output logic [$clog2(3*P.NF+7)-1:0]  FmaShiftAmt,         // normalization shift count
-  output logic [3*P.NF+7:0]            FmaShiftIn           // is the sum zero
+  output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt,         // normalization shift count
+  output logic [P.FMALEN+1:0]          FmaShiftIn           
 );
  logic [P.NE+1:0]                     PreNormSumExp;       // the exponent of the normalized sum with the P.FLEN bias
  logic [P.NE+1:0]                     BiasCorr;            // correction for bias
@ -49,7 +49,7 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
  assign FmaSZero = ~(|FmaSm);

  // calculate the sum's exponent FmaSe-FmaSCnt+NF+2
-  assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4);
+  assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(P.FMALEN+1)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4);

  //convert the sum's exponent into the proper precision
  if (P.FPSIZES == 1) begin
@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
  // set and calculate the shift input and amount
  //  - shift once if killing a product and the result is subnormal
  assign FmaShiftIn = {2'b0, FmaSm};
-  if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1;
-  else                assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
+  if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1;
+  else                assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1;
 endmodule
--- a/src/fpu/postproc/postprocess.sv
+++ b/src/fpu/postproc/postprocess.sv
@ -44,9 +44,9 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
  input logic                              FmaPs,               // the product's sign
  input logic                              FmaSs,               // Sum sign
  input logic  [P.NE+1:0]                  FmaSe,               // the sum's exponent
-  input logic  [3*P.NF+5:0]                FmaSm,               // the positive sum
+  input logic  [P.FMALEN-1:0]                FmaSm,               // the positive sum
  input logic                              FmaASticky,          // sticky bit that is calculated during alignment
-  input logic  [$clog2(3*P.NF+7)-1:0]      FmaSCnt,             // the normalization shift count
+  input logic  [$clog2(P.FMALEN+1)-1:0]      FmaSCnt,             // the normalization shift count
  //divide signals
  input logic                              DivSticky,           // divider sticky bit
  input logic  [P.NE+1:0]                  DivUe,               // divsqrt exponent
@ -70,8 +70,8 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
  logic                        Rs;                   // result sign
  logic [P.NF-1:0]             Rf;                   // Result fraction
  logic [P.NE-1:0]             Re;                   // Result exponent
-  logic                        Ms;                   // norMalized sign
-  logic [P.CORRSHIFTSZ-1:0]    Mf;                   // norMalized fraction
+  logic                        Ms;                   // normalized sign
+  logic [P.NORMSHIFTSZ-1:0]    Mf;                   // normalized fraction
  logic [P.NE+1:0]             Me;                   // normalized exponent
  logic [P.NE+1:0]             FullRe;               // Re with bits to determine sign and overflow
  logic                        UfPlus1;              // do you add one (for determining underflow flag)
@ -86,10 +86,10 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
  // fma signals
  logic [P.NE+1:0]             FmaMe;                // exponent of the normalized sum
  logic                        FmaSZero;             // is the sum zero
-  logic [3*P.NF+7:0]           FmaShiftIn;           // fma shift input
+  logic [P.FMALEN+1:0]         FmaShiftIn;           // fma shift input
  logic [P.NE+1:0]             NormSumExp;           // exponent of the normalized sum not taking into account Subnormal or zero results
  logic                        FmaPreResultSubnorm;  // is the result subnormal - calculated before LZA corection
-  logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt;          // normalization shift amount for fma
+  logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt;          // normalization shift amount for fma
  // division signals
  logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt;          // divsqrt shif amount
  logic [P.NORMSHIFTSZ-1:0]    DivShiftIn;           // divsqrt shift input
@ -154,8 +154,8 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
  always_comb
    case(PostProcSel)
      2'b10: begin // fma
-        ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt};
-        ShiftIn  =  {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}};
+        ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt};
+        ShiftIn  =  {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}};
      end
      2'b00: begin // cvt
        ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};
--- a/src/fpu/postproc/round.sv
+++ b/src/fpu/postproc/round.sv
@ -32,7 +32,7 @@ module round import cvw::*;  #(parameter cvw_t P) (
  input  logic [2:0]               Frm,                // rounding mode
  input  logic [1:0]               PostProcSel,        // select the postprocessor output
  input  logic                     Ms,                 // normalized sign
-  input  logic [P.CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
+  input  logic [P.NORMSHIFTSZ-1:0] Mf,                 // normalized fraction
  // fma
  input  logic                     FmaOp,              // is an fma operation being done?
  input  logic [P.NE+1:0]          FmaMe,              // exponent of the normalized sum for fma
@ -123,61 +123,61 @@ module round import cvw::*;  #(parameter cvw_t P) (
      //      |    NF     |1|1|
      //                     ^    ^ if floating point result
      //                     ^ if not an FMA result
-      if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes)  |
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
+      if (XLENPOS == 1)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes)  |
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
      //     2: NF > XLEN
-      if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&IntRes) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
+      if (XLENPOS == 2)assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&IntRes) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);

  end else if (P.FPSIZES == 2) begin
      // XLEN is either 64 or 32
      // so half and single are always smaller then XLEN

      // 1: XLEN > NF   > NF1
-      if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
+      if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~OutFmt) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
      // 2: NF   > XLEN > NF1
-      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | 
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~OutFmt) | 
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~OutFmt)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
      // 3: NF   > NF1  > XLEN
-      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&IntRes) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&IntRes) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~OutFmt|IntRes)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);

  end else if (P.FPSIZES == 3) begin
      // 1: XLEN > NF   > NF1
-      if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes) |
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
+      if (XLENPOS == 1) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&FpRes&~(OutFmt==P.FMT)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes) |
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:0]);
      // 2: NF   > XLEN > NF1
-      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | 
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.NF1-1]&FpRes&(OutFmt==P.FMT2)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.FMT)) | 
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF-1]&(IntRes|~(OutFmt==P.FMT))) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);
      // 3: NF   > NF1  > XLEN
-      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.NF2-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&(OutFmt==P.FMT2)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT2)|IntRes)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF1-2:P.NORMSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT)|IntRes)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.NF-2:0]);

  end else if (P.FPSIZES == 4) begin
      // Quad precision will always be greater than XLEN
      // 2: NF   > XLEN > NF1
-      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | 
-                                                (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | 
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.D_NF-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | 
+                                                (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&~(OutFmt==P.Q_FMT)) | 
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]);
      // 3: NF   > NF1  > XLEN
      // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
-                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
-                                                (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.NORMSHIFTSZ-P.H_NF-2:P.NORMSHIFTSZ-P.S_NF-1]&FpRes&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.S_NF-2:P.NORMSHIFTSZ-P.XLEN-1]&FpRes&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
+                                                (|Mf[P.NORMSHIFTSZ-P.XLEN-2:P.NORMSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT)|IntRes)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.D_NF-2:P.NORMSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT)|IntRes)) |
+                                                (|Mf[P.NORMSHIFTSZ-P.Q_NF-2:0]);

  end

@ -188,32 +188,32 @@ module round import cvw::*;  #(parameter cvw_t P) (
  // determine round and LSB of the rounded value
  //      - underflow round bit is used to determint the underflow flag
  if (P.FPSIZES == 1) begin
-      assign FpGuard  = Mf[P.CORRSHIFTSZ-P.NF-1];
-      assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
-      assign FpRound  = Mf[P.CORRSHIFTSZ-P.NF-2];
+      assign FpGuard  = Mf[P.NORMSHIFTSZ-P.NF-1];
+      assign FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF];
+      assign FpRound  = Mf[P.NORMSHIFTSZ-P.NF-2];

  end else if (P.FPSIZES == 2) begin
-      assign FpGuard  = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
-      assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
-      assign FpRound  = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
+      assign FpGuard  = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-1] : Mf[P.NORMSHIFTSZ-P.NF1-1];
+      assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF] : Mf[P.NORMSHIFTSZ-P.NF1];
+      assign FpRound  = OutFmt ? Mf[P.NORMSHIFTSZ-P.NF-2] : Mf[P.NORMSHIFTSZ-P.NF1-2];

  end else if (P.FPSIZES == 3) begin
      always_comb
          case (OutFmt)
              P.FMT: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.NF-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.NF-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.NF-2];
              end
              P.FMT1: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.NF1-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.NF1-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.NF1-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF1];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.NF1-2];
              end
              P.FMT2: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.NF2-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.NF2-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.NF2-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.NF2];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.NF2-2];
              end
              default: begin
                  FpGuard  = 1'bx;
@ -225,31 +225,31 @@ module round import cvw::*;  #(parameter cvw_t P) (
      always_comb
          case (OutFmt)
              2'h3: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.Q_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.Q_NF];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.Q_NF-2];
              end
              2'h1: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.D_NF-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.D_NF-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.D_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.D_NF];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.D_NF-2];
              end
              2'h0: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.S_NF-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.S_NF-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.S_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.S_NF];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.S_NF-2];
              end
              2'h2: begin
-                  FpGuard  = Mf[P.CORRSHIFTSZ-P.H_NF-1];
-                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
-                  FpRound  = Mf[P.CORRSHIFTSZ-P.H_NF-2];
+                  FpGuard  = Mf[P.NORMSHIFTSZ-P.H_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZ-P.H_NF];
+                  FpRound  = Mf[P.NORMSHIFTSZ-P.H_NF-2];
              end
          endcase
  end

-  assign Guard  = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-1] : FpGuard;
-  assign LsbRes = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN] : FpLsbRes;
-  assign Round  = CvtToInt ? Mf[P.CORRSHIFTSZ-P.XLEN-2] : FpRound;
+  assign Guard  = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-1] : FpGuard;
+  assign LsbRes = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN] : FpLsbRes;
+  assign Round  = CvtToInt ? Mf[P.NORMSHIFTSZ-P.XLEN-2] : FpRound;

  always_comb begin
      // Determine if you add 1
@ -296,7 +296,7 @@ module round import cvw::*;  #(parameter cvw_t P) (
      assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};

  // trim unneeded bits from fraction
-  assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
+  assign RoundFrac = Mf[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.NF];
  
  // select the exponent
  always_comb
--- a/src/fpu/postproc/shiftcorrection.sv
+++ b/src/fpu/postproc/shiftcorrection.sv
@ -41,11 +41,11 @@ module shiftcorrection import cvw::*;  #(parameter cvw_t P) (
  input logic                      FmaSZero,
  // output
  output logic [P.NE+1:0]          FmaMe,                  // exponent of the normalized sum
-  output logic [P.CORRSHIFTSZ-1:0] Mf,                     // the shifted sum after correction
+  output logic [P.NORMSHIFTSZ-1:0] Mf,                     // the shifted sum after correction
  output logic [P.NE+1:0]          Ue                      // corrected exponent for divider
 );

-  logic [P.CORRSHIFTSZ-1:0]        CorrShifted;         // the shifted sum after LZA correction
+  logic [P.NORMSHIFTSZ-1:0]        CorrShifted;         // the shifted sum after LZA correction
  logic                            ResSubnorm;             // is the result Subnormal
  logic                            LZAPlus1;               // add one or two to the sum's exponent due to LZA correction
  logic                            LeftShiftQm;            // should the divsqrt result be shifted one to the left
@ -69,12 +69,12 @@ module shiftcorrection import cvw::*;  #(parameter cvw_t P) (
  assign RightShift = FmaOp ? LZAPlus1 : LeftShiftQm;

  // one bit right shift for FMA or division
-  mux2 #(P.NORMSHIFTSZ-2) corrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], RightShift, CorrShifted);
+  mux2 #(P.NORMSHIFTSZ) corrmux({Shifted[P.NORMSHIFTSZ-3:0], 2'b00}, {Shifted[P.NORMSHIFTSZ-2:1], 2'b00}, RightShift, CorrShifted);
  
  // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
  always_comb
    if (FmaOp | (DivOp & ~DivResSubnorm))  Mf = CorrShifted;
-    else                               Mf = Shifted[P.NORMSHIFTSZ-1:2];
+    else                                   Mf = Shifted[P.NORMSHIFTSZ-1:0];
    
  // Determine sum's exponent
  //  main exponent issues: 
--- a/testbench/testbench_fp.sv
+++ b/testbench/testbench_fp.sv
@ -98,8 +98,8 @@ module testbench_fp;
   logic [P.NE+1:0] 		Se;
   logic 			ASticky;
   logic 			KillProd; 
-   logic [$clog2(3*P.NF+7)-1:0] SCnt;
-   logic [3*P.NF+5:0] 		Sm;       
+   logic [$clog2(P.FMALEN+1)-1:0] SCnt;
+   logic [P.FMALEN-1:0] 		Sm;       
   logic 			InvA;
   logic 			NegSum;
   logic 			As;