diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index ea39ca35..e5adea1f 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -97,7 +97,7 @@
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
+`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+8))
 `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
 
 // division constants
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 7464149f..a1c0a276 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -40,7 +40,7 @@ module fmashiftcalc(
     output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
-    output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
+    output logic [3*`NF+7:0]            FmaShiftIn        // is the sum zero
 );
     logic [`NE+1:0]             PreNormSumExp;       // the exponent of the normalized sum with the `FLEN bias
     logic [`NE+1:0] BiasCorr;
@@ -150,7 +150,7 @@ module fmashiftcalc(
 
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
-    assign FmaShiftIn = {3'b0, FmaSm};
+    assign FmaShiftIn = {2'b0, FmaSm};
     if (`FPSIZES == 1)
         assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
     else
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 4b7a1ffe..3e5485ac 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -289,18 +289,22 @@ module fpu (
    // data to be stored in memory - to IEU
    //    - FP uses NaN-blocking format
    //        - if there are any unsused bits the most significant bits are filled with 1s
-   if (`LLEN==`XLEN) begin
-      assign FWriteDataE = YE[`XLEN-1:0]; 
-   end else begin
-      logic [`FLEN-1:0] FWriteDataE;
-      if(`FMTBITS == 2) assign FStore2 = (FmtM == `FMT)&~IllegalFPUInstrM;
-      else assign FStore2 = FmtM&~IllegalFPUInstrM;
-
-      if (`FPSIZES==1) assign FWriteDataE = YE;
-      else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
-      else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
-
-      flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
+   
+   if(`LLEN==`XLEN)
+      assign FWriteDataE = {{`XLEN-`FLEN{1'b1}}, YE};
+   else begin
+      logic [`FLEN-1:0] WriteDataE;
+      if(`FPSIZES == 1) assign WriteDataE = YE;
+      else if(`FPSIZES == 2) assign WriteDataE = FmtE ? YE : {`FLEN/`LEN1{YE[`LEN1-1:0]}};
+      else 
+         always_comb
+               case(FmtE)
+                  `Q_FMT: WriteDataE = YE;
+                  `D_FMT: WriteDataE = {`FLEN/`D_LEN{YE[`D_LEN-1:0]}};
+                  `S_FMT: WriteDataE = {`FLEN/`S_LEN{YE[`S_LEN-1:0]}};
+                  `H_FMT: WriteDataE = {`FLEN/`H_LEN{YE[`H_LEN-1:0]}};
+               endcase
+      flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, WriteDataE, FWriteDataM);
    end
 
    // NaN Block SrcA
@@ -314,6 +318,7 @@ module fpu (
                              {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, 
                              {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
    endgenerate
+
    // select a result that may be written to the FP register
    mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
    assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 003c23d7..f9610144 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -96,7 +96,7 @@ module postprocess (
     // fma signals
     logic [`NE+1:0] FmaMe;     // exponent of the normalized sum
     logic FmaSZero;        // is the sum zero
-    logic [3*`NF+8:0] FmaShiftIn;        // shift input
+    logic [3*`NF+7:0] FmaShiftIn;        // shift input
     logic [`NE+1:0] NormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
     logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
     logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt;   // normalization shift count
@@ -160,7 +160,7 @@ module postprocess (
         case(PostProcSel)
             2'b10: begin // fma
                 ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
-                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
+                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+8){1'b0}}};
             end
             2'b00: begin // cvt
                 ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index 6329ffe2..4839ba29 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -45,15 +45,14 @@ module shiftcorrection(
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
     logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
     logic                  ResDenorm;    // is the result denormalized
-    logic                  LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
+    logic                  LZAPlus1; // add one or two to the sum's exponent due to LZA correction
 
     // LZA correction
-    assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
-    assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
+    assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-    assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
+    assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-2:1] : Shifted[`NORMSHIFTSZ-3:0];
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
+    assign CorrQmShifted = (LZAPlus1|(DivQe==1&~LZAPlus1)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
     always_comb
         if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
@@ -61,11 +60,11 @@ module shiftcorrection(
         else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
+    assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
-    assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
+    assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
 
     // the quotent is in the range [.5,2) if there is no early termination
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
-    assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
+    assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus1};
 endmodule
\ No newline at end of file