diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh
index 25d0d8c65..d991b4b32 100644
--- a/config/shared/config-shared.vh
+++ b/config/shared/config-shared.vh
@@ -118,7 +118,7 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
 // RV32F: max(32+23+1, 2(23)+4, 3(23)+6) = 3*23+6 = 75
 // RV64F: max(64+23+1, 64 + 23 + 2, 3*23+6) = 89
 // RV64D: max(84+52+1, 64+52+2, 3*52+6) = 162
-localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+6));
+localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+8));
 
 localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));                  // log_2(NORMSHIFTSZ)
 localparam CORRSHIFTSZ = NORMSHIFTSZ-2;                             // Drop leading 2 integer bits
diff --git a/src/fpu/fma/fma.sv b/src/fpu/fma/fma.sv
index bdf2898f7..3576b95df 100644
--- a/src/fpu/fma/fma.sv
+++ b/src/fpu/fma/fma.sv
@@ -34,13 +34,13 @@ module fma import cvw::*;  #(parameter cvw_t P) (
   input  logic                         XZero, YZero, ZZero,    // is the input zero
   input  logic [2:0]                   OpCtrl,                 // operation control
   output logic                         ASticky,                // sticky bit that is calculated during alignment
-  output logic [3*P.NF+3:0]            Sm,                     // the positive sum's significand
+  output logic [3*P.NF+5:0]            Sm,                     // the positive sum's significand
   output logic                         InvA,                   // Was A inverted for effective subtraction (P-A or -P+A)
   output logic                         As,                     // the aligned addend's sign (modified Z sign for other operations)
   output logic                         Ps,                     // the product's sign
   output logic                         Ss,                     // the sum's sign
   output logic [P.NE+1:0]              Se,                     // the sum's exponent
-  output logic [$clog2(3*P.NF+5)-1:0]  SCnt                    // normalization shift count
+  output logic [$clog2(3*P.NF+7)-1:0]  SCnt                    // normalization shift count
 );
 
   //  OpCtrl:
@@ -54,8 +54,8 @@ module fma import cvw::*;  #(parameter cvw_t P) (
   //        111 - sub
 
   logic [2*P.NF+1:0]   Pm;         // the product's significand in U(2.2Nf) format
-  logic [3*P.NF+3:0]   Am;         // addend aligned's mantissa for addition in U(NF+4.2NF)
-  logic [3*P.NF+3:0]   AmInv;      // aligned addend's mantissa possibly inverted
+  logic [3*P.NF+5:0]   Am;         // addend aligned's mantissa for addition in U(NF+4.2NF)
+  logic [3*P.NF+5:0]   AmInv;      // aligned addend's mantissa possibly inverted
   logic [2*P.NF+1:0]   PmKilled;   // the product's mantissa possibly killed U(2.2Nf)
   logic                KillProd;   // set the product to zero before addition if the product is too small to matter
   logic [P.NE+1:0]     Pe;         // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
@@ -89,6 +89,6 @@ module fma import cvw::*;  #(parameter cvw_t P) (
       
   fmaadd #(P) add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
 
-  fmalza #(3*P.NF+4, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
+  fmalza #(3*P.NF+6, P.NF) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
   
 endmodule
diff --git a/src/fpu/fma/fmaadd.sv b/src/fpu/fma/fmaadd.sv
index 00951ee10..995494f2c 100644
--- a/src/fpu/fma/fmaadd.sv
+++ b/src/fpu/fma/fmaadd.sv
@@ -28,7 +28,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 module fmaadd import cvw::*;  #(parameter cvw_t P) (
-  input  logic [3*P.NF+3:0]    Am,         // aligned addend's mantissa for addition in U(NF+5.2NF+1)
+  input  logic [3*P.NF+5:0]    Am,         // aligned addend's mantissa for addition in U(NF+5.2NF+1)
   input  logic [P.NE-1:0]      Ze,         // exponent of Z
   input  logic                 Ps,         // the product sign and the alligend addeded's sign (Modified Z sign for other operations)
   input  logic [P.NE+1:0]      Pe,         // product's exponet
@@ -36,14 +36,14 @@ module fmaadd import cvw::*;  #(parameter cvw_t P) (
   input  logic                 InvA,       // invert the aligned addend
   input  logic                 KillProd,   // should the product be set to 0
   input  logic                 ASticky,    // Alighed addend's sticky bit
-  output logic [3*P.NF+3:0]    AmInv,      // aligned addend possibly inverted
+  output logic [3*P.NF+5:0]    AmInv,      // aligned addend possibly inverted
   output logic [2*P.NF+1:0]    PmKilled,   // the product's mantissa possibly killed
   output logic                 Ss,         // sum's sign    
   output logic [P.NE+1:0]      Se,         // sum's exponent
-  output logic [3*P.NF+3:0]    Sm          // the positive sum
+  output logic [3*P.NF+5:0]    Sm          // the positive sum
 );
 
-  logic [3*P.NF+3:0]    PreSum, NegPreSum; // possibly negative sum
+  logic [3*P.NF+5:0]    PreSum, NegPreSum; // possibly negative sum
   logic                 NegSum;            // was the sum negative
 
   ///////////////////////////////////////////////////////////////////////////////
@@ -62,8 +62,8 @@ module fmaadd import cvw::*;  #(parameter cvw_t P) (
   //      addend - prod where product is killed (and not exactly zero) then don't add +1 from negation 
   //          ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
   //          in this case this result is only ever selected when InvA=1 so we can remove &InvA
-  assign {NegSum, PreSum} = {{P.NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*P.NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
-  assign NegPreSum = Am + {{P.NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*P.NF+2)'(0), ~ASticky|~KillProd, 1'b0};
+  assign {NegSum, PreSum} = {{P.NF+3{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*P.NF+5{1'b0}}, (~ASticky|KillProd)&InvA};
+  assign NegPreSum = Am + {{P.NF+2{1'b1}}, ~PmKilled, 2'b0} + {(3*P.NF+3)'(0), ~ASticky|~KillProd, 2'b0};
     
   // Choose the positive sum and accompanying LZA result.
   assign Sm = NegSum ? NegPreSum : PreSum;
diff --git a/src/fpu/fma/fmaalign.sv b/src/fpu/fma/fmaalign.sv
index c1d22ec48..4fc796fda 100644
--- a/src/fpu/fma/fmaalign.sv
+++ b/src/fpu/fma/fmaalign.sv
@@ -31,14 +31,14 @@ module fmaalign import cvw::*;  #(parameter cvw_t P) (
   input  logic [P.NE-1:0]      Xe, Ye, Ze,          // biased exponents in B(NE.0) format
   input  logic [P.NF:0]        Zm,                  // significand in U(0.NF) format]
   input  logic                 XZero, YZero, ZZero, // is the input zero
-  output logic [3*P.NF+3:0]    Am,                  // addend aligned for addition in U(NF+5.2NF+1)
+  output logic [3*P.NF+5:0]    Am,                  // addend aligned for addition in U(NF+5.2NF+1)
   output logic                 ASticky,             // Sticky bit calculated from the aliged addend
   output logic                 KillProd             // should the product be set to zero
 );
 
   logic [P.NE+1:0]             ACnt;                // how far to shift the addend to align with the product in Q(NE+2.0) format
-  logic [4*P.NF+3:0]           ZmShifted;           // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
-  logic [4*P.NF+3:0]           ZmPreshifted;        // input to the alignment shifter U(NF+5.3NF+1)
+  logic [4*P.NF+5:0]           ZmShifted;           // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
+  logic [4*P.NF+5:0]           ZmPreshifted;        // input to the alignment shifter U(NF+5.3NF+1)
   logic                        KillZ;               // should the addend be killed
 
   ///////////////////////////////////////////////////////////////////////////////
@@ -49,36 +49,37 @@ module fmaalign import cvw::*;  #(parameter cvw_t P) (
   //      - negative means Z is larger, so shift Z left
   //      - positive means the product is larger, so shift Z right
   // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
-  assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+2) - {2'b0, Ze};
+  assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)} + (P.NE+2)'(P.NF+3) - {2'b0, Ze};
 
   // Default Addition with only inital left shift
-  //  |   53'b0    |  106'b(product)  | 1'b0 |
+  // extra bit at end and beginning so the correct guard bit is calculated when subtracting
+  //  |   54'b0    |  106'b(product)  | 2'b0 |
   //  | addnend    |
 
-  assign ZmPreshifted = {Zm,(3*P.NF+3)'(0)};
+  assign ZmPreshifted = {Zm,(3*P.NF+5)'(0)};
   assign KillProd     = (ACnt[P.NE+1]&~ZZero)|XZero|YZero;
-  assign KillZ        = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(3));
+  assign KillZ        = $signed(ACnt)>$signed((P.NE+2)'(3)*(P.NE+2)'(P.NF)+(P.NE+2)'(5));
 
   always_comb begin
     // If the product is too small to effect the sum, kill the product
-    //  |   53'b0    |  106'b(product)  | 1'b0 |
+    //  |   54'b0    |  106'b(product)  | 2'b0 |
     //  | addnend    |
     if (KillProd) begin
-        ZmShifted = {(P.NF+2)'(0), Zm, (2*P.NF+1)'(0)};
+        ZmShifted = {(P.NF+3)'(0), Zm, (2*P.NF+2)'(0)};
         ASticky   = ~(XZero|YZero);
 
     // If the addend is too small to effect the addition        
     //      - The addend has to shift two past the end of the product to be considered too small
     //      - The 2 extra bits are needed for rounding
       
-    //  |   53'b0    |  106'b(product)  | 1'b0 |
+    //  |   54'b0    |  106'b(product)  | 2'b0 |
     //  | addnend    |
     end else if (KillZ)  begin
         ZmShifted = '0;
         ASticky   = ~ZZero;
 
     // If the Addend is shifted right
-    //  |   53'b0    |  106'b(product)  | 1'b0 |
+    //  |   54'b0    |  106'b(product)  | 2'b0 |
     //  | addnend    |
     end else begin
         ZmShifted = ZmPreshifted >> ACnt;
@@ -86,6 +87,6 @@ module fmaalign import cvw::*;  #(parameter cvw_t P) (
     end
   end
 
-  assign Am = ZmShifted[4*P.NF+3:P.NF];
+  assign Am = ZmShifted[4*P.NF+5:P.NF];
 
 endmodule
diff --git a/src/fpu/fma/fmalza.sv b/src/fpu/fma/fmalza.sv
index 822f857c2..01439f4d1 100644
--- a/src/fpu/fma/fmalza.sv
+++ b/src/fpu/fma/fmalza.sv
@@ -41,7 +41,7 @@ module fmalza #(WIDTH, NF) (
   logic [WIDTH-1:0]                   P, G, K;        // propagate, generate, kill for each column
   logic [WIDTH-1:0]                   Pp1, Gm1, Km1;  // propagate shifted right by 1, generate/kill shifted left 1
 
-  assign B = {{(NF+1){1'b0}}, Pm, 1'b0};              // Zero extend product
+  assign B = {{(NF+2){1'b0}}, Pm, 2'b0};              // Zero extend product
 
   assign P = A^B;
   assign G = A&B;
diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv
index 22c650ed8..a250827bc 100755
--- a/src/fpu/fpu.sv
+++ b/src/fpu/fpu.sv
@@ -116,14 +116,14 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
   // Fma Signals
   logic                        FmaAddSubE;                         // Multiply by 1.0 when adding or subtracting
   logic [1:0]                  FmaZSelE;                           // Select Z = Y when adding or subtracting, 0 when multiplying
-  logic [3*P.NF+3:0]           SmE, SmM;                           // Sum significand
+  logic [3*P.NF+5:0]           SmE, SmM;                           // Sum significand
   logic                        FmaAStickyE, FmaAStickyM;           // FMA addend sticky bit output
   logic [P.NE+1:0]             SeE,SeM;                            // Sum exponent
   logic                        InvAE, InvAM;                       // Invert addend
   logic                        AsE, AsM;                           // Addend sign
   logic                        PsE, PsM;                           // Product sign
   logic                        SsE, SsM;                           // Sum sign
-  logic [$clog2(3*P.NF+5)-1:0] SCntE, SCntM;                       // LZA sum leading zero count
+  logic [$clog2(3*P.NF+7)-1:0] SCntE, SCntM;                       // LZA sum leading zero count
   
   // Cvt Signals
   logic [P.NE:0]               CeE, CeM;                           // convert intermediate expoent
@@ -351,8 +351,8 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
     {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
     {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
   flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
-  flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
-  flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
+  flopenrc #(3*P.NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
+  flopenrc #($clog2(3*P.NF+7)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
     {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
     {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
   flopenrc #(P.NE+P.LOGCVTLEN+P.CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv
index 5c611a7e3..5b0f1175b 100644
--- a/src/fpu/postproc/fmashiftcalc.sv
+++ b/src/fpu/postproc/fmashiftcalc.sv
@@ -30,13 +30,13 @@
 module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
   input  logic [P.FMTBITS-1:0]         Fmt,                 // precision 1 = double 0 = single
   input  logic [P.NE+1:0]              FmaSe,               // sum's exponent
-  input  logic [3*P.NF+3:0]            FmaSm,               // the positive sum
-  input  logic [$clog2(3*P.NF+5)-1:0]  FmaSCnt,             // normalization shift count
+  input  logic [3*P.NF+5:0]            FmaSm,               // the positive sum
+  input  logic [$clog2(3*P.NF+7)-1:0]  FmaSCnt,             // normalization shift count
   output logic [P.NE+1:0]              NormSumExp,          // exponent of the normalized sum not taking into account Subnormal or zero results
   output logic                         FmaSZero,            // is the result subnormal - calculated before LZA corection
   output logic                         FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
-  output logic [$clog2(3*P.NF+5)-1:0]  FmaShiftAmt,         // normalization shift count
-  output logic [3*P.NF+5:0]            FmaShiftIn           // is the sum zero
+  output logic [$clog2(3*P.NF+7)-1:0]  FmaShiftAmt,         // normalization shift count
+  output logic [3*P.NF+7:0]            FmaShiftIn           // is the sum zero
 );
   logic [P.NE+1:0]                     PreNormSumExp;       // the exponent of the normalized sum with the P.FLEN bias
   logic [P.NE+1:0]                     BiasCorr;            // correction for bias
@@ -48,8 +48,8 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
   // Determine if the sum is zero
   assign FmaSZero = ~(|FmaSm);
 
-  // calculate the sum's exponent
-  assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+5)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+3);
+  // calculate the sum's exponent FmaSe-FmaSCnt+NF+2
+  assign PreNormSumExp = FmaSe + {{P.NE+2-$unsigned($clog2(3*P.NF+7)){1'b1}}, ~FmaSCnt} + (P.NE+2)'(P.NF+4);
 
   //convert the sum's exponent into the proper precision
   if (P.FPSIZES == 1) begin
@@ -131,6 +131,6 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
   // set and calculate the shift input and amount
   //  - shift once if killing a product and the result is subnormal
   assign FmaShiftIn = {2'b0, FmaSm};
-  if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2): FmaSCnt+1;
-  else                assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+2)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
+  if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3): FmaSCnt+1;
+  else                assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*P.NF+5)-1:0]+($clog2(3*P.NF+5))'(P.NF+3)+BiasCorr[$clog2(3*P.NF+5)-1:0]: FmaSCnt+1;
 endmodule
diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv
index 17dda38a0..aa181c5e0 100644
--- a/src/fpu/postproc/postprocess.sv
+++ b/src/fpu/postproc/postprocess.sv
@@ -44,9 +44,9 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   input logic                              FmaPs,               // the product's sign
   input logic                              FmaSs,               // Sum sign
   input logic  [P.NE+1:0]                  FmaSe,               // the sum's exponent
-  input logic  [3*P.NF+3:0]                FmaSm,               // the positive sum
+  input logic  [3*P.NF+5:0]                FmaSm,               // the positive sum
   input logic                              FmaASticky,          // sticky bit that is calculated during alignment
-  input logic  [$clog2(3*P.NF+5)-1:0]      FmaSCnt,             // the normalization shift count
+  input logic  [$clog2(3*P.NF+7)-1:0]      FmaSCnt,             // the normalization shift count
   //divide signals
   input logic                              DivSticky,           // divider sticky bit
   input logic  [P.NE+1:0]                  DivUe,               // divsqrt exponent
@@ -86,7 +86,7 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   // fma signals
   logic [P.NE+1:0]             FmaMe;                // exponent of the normalized sum
   logic                        FmaSZero;             // is the sum zero
-  logic [3*P.NF+5:0]           FmaShiftIn;           // fma shift input
+  logic [3*P.NF+7:0]           FmaShiftIn;           // fma shift input
   logic [P.NE+1:0]             NormSumExp;           // exponent of the normalized sum not taking into account Subnormal or zero results
   logic                        FmaPreResultSubnorm;  // is the result subnormal - calculated before LZA corection
   logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt;          // normalization shift amount for fma
@@ -155,7 +155,7 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
     case(PostProcSel)
       2'b10: begin // fma
         ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(3*P.NF+5){1'b0}}, FmaShiftAmt};
-        ShiftIn  =  {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+6){1'b0}}};
+        ShiftIn  =  {FmaShiftIn, {P.NORMSHIFTSZ-(3*P.NF+8){1'b0}}};
       end
       2'b00: begin // cvt
         ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};
diff --git a/testbench/testbench_fp.sv b/testbench/testbench_fp.sv
index 7c80afc62..6b772a918 100644
--- a/testbench/testbench_fp.sv
+++ b/testbench/testbench_fp.sv
@@ -98,8 +98,8 @@ module testbench_fp;
    logic [P.NE+1:0] 		Se;
    logic 			ASticky;
    logic 			KillProd; 
-   logic [$clog2(3*P.NF+5)-1:0] SCnt;
-   logic [3*P.NF+3:0] 		Sm;       
+   logic [$clog2(3*P.NF+7)-1:0] SCnt;
+   logic [3*P.NF+5:0] 		Sm;       
    logic 			InvA;
    logic 			NegSum;
    logic 			As;
@@ -974,8 +974,8 @@ module testbench_fp;
       if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin
          errors += 1;
          $display("\nError in %s", Tests[TestNum]);
-         $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
-         $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X[P.FLEN-1:0], Y[P.FLEN-1:0], Z[P.FLEN-1:0], SrcA, Res[P.FLEN-1:0], ResFlg, Ans[P.FLEN-1:0], AnsFlg);
+         $display("TestNum %d VectorNum %d OpCtrl %d", TestNum, VectorNum, OpCtrl[TestNum]);	 
+         $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
          $stop;
       end