diff --git a/wally-pipelined/config/rv64icfd/wally-config.vh b/wally-pipelined/config/rv64icfd/wally-config.vh
index 136fb264c..500904b00 100644
--- a/wally-pipelined/config/rv64icfd/wally-config.vh
+++ b/wally-pipelined/config/rv64icfd/wally-config.vh
@@ -26,7 +26,7 @@
 
 // include shared configuration
 `include "wally-shared.vh"
-  // `include "../shared/wally-shared.vh"
+// `include "../shared/wally-shared.vh"
 
 `define QEMU 0
 `define BUILDROOT 0
diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
index bf09314ee..f3e48ef91 100644
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@@ -130,10 +130,21 @@ assign	wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23
 // assign	ZNaNE = FmtE ? &Z[62:52] && |Z[51:0] : &Z[62:55] && |Z[54:32]; 
 assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0]; 
  // instantiate device under test
-fma1 UUT1(.XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}), .*);
+
+    logic [3*`NF+5:0]	SumE, SumM;       
+    logic 			    InvZE, InvZM;
+    logic 			    NegSumE, NegSumM;
+    logic 			    ZSgnEffE, ZSgnEffM;
+    logic 			    PSgnE, PSgnM;
+    logic [8:0]			NormCntE, NormCntM;
+    
+    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}),
+                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
+                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
+                .ProdExpE, .AddendStickyE, .KillProdE); 
 fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .ZSgnM(ZSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
               //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
-               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .AlignedAddendM(AlignedAddendE), .ProdManM(ProdManE),
+               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
                .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
 
 
diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
index 199d9bbd8..0741e9d6d 100755
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@@ -1,3 +1,3 @@
-testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
 
diff --git a/wally-pipelined/src/fpu/cvtfp.sv b/wally-pipelined/src/fpu/cvtfp.sv
index 0925e23a3..a8fd2bc4c 100644
--- a/wally-pipelined/src/fpu/cvtfp.sv
+++ b/wally-pipelined/src/fpu/cvtfp.sv
@@ -41,12 +41,10 @@ module cvtfp (
     logic [12:0] ShiftCnt;
 	logic [51:0] SFrac;
 	logic [25:0] DFrac;
-	logic [77:0] DFracTmp,tmp, tmp2;
+	logic [77:0] DFracTmp;
     //assign ShiftCnt = FmtE ? -DExpCalc&{13{Denorm}} : NormCnt;
     assign SFrac = XManE[51:0] << NormCnt;
 logic Shift;
-assign tmp = (-DExpCalc+1)&{13{Shift}};
-assign tmp2 = {XManE, 23'b0};
 assign Shift = {13{Denorm|(($signed(DExpCalc) > $signed(-25)) & DExpCalc[12])}};
 	assign DFracTmp = {XManE, 25'b0} >> ((-DExpCalc+1)&{13{Shift}});
 assign DFrac = DFracTmp[76:51];
diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv
index 55fdd4fe9..466a6c40d 100644
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@@ -23,7 +23,7 @@
 ///////////////////////////////////////////
 
 `include "wally-config.vh"
- //  `include "../../../config/rv64icfd/wally-config.vh"
+//   `include "../../../config/rv64icfd/wally-config.vh"
 
 module fma(
     input logic                 clk,
@@ -58,28 +58,33 @@ module fma(
       //	{?, is mul, negate product, negate addend}
 
     // signals transfered between pipeline stages
-    logic [2*`NF+1:0]	ProdManE, ProdManM; 
-    logic [3*`NF+5:0]	AlignedAddendE, AlignedAddendM;                       
+    // logic [2*`NF+1:0]	ProdManE, ProdManM; 
+    // logic [3*`NF+5:0]	AlignedAddendE, AlignedAddendM;
+    logic [3*`NF+5:0]	SumE, SumM;                       
     logic [`NE+1:0]	    ProdExpE, ProdExpM;
     logic 			    AddendStickyE, AddendStickyM;
     logic 			    KillProdE, KillProdM;
+    logic 			    InvZE, InvZM;
+    logic 			    NegSumE, NegSumM;
+    logic 			    ZSgnEffE, ZSgnEffM;
+    logic 			    PSgnE, PSgnM;
+    logic [8:0]			NormCntE, NormCntM;
     
-    fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
+    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
                 .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
-                .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
+                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                 .ProdExpE, .AddendStickyE, .KillProdE); 
                 
     // E/M pipeline registers
-    flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); 
-    flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); 
+    // flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); 
+    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
     flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-    flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                            {AddendStickyE, KillProdE},
-                            {AddendStickyM, KillProdM});
+    flopenrc #(15) EMRegFma4(clk, reset, FlushM, ~StallM, 
+                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE},
+                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM});
 
     fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
-            .FOpCtrlM, .FrmM, .FmtM, 
-            .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, 
+            .FOpCtrlM, .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
             .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
             .FMAResM, .FMAFlgM);
 
@@ -88,7 +93,7 @@ endmodule
 
 
 module fma1(
-    // input logic        XSgnE, YSgnE, ZSgnE,
+    input logic        XSgnE, YSgnE, ZSgnE,
     input logic [`NE-1:0] XExpE, YExpE, ZExpE,      // biased exponents in B(NE.0) format
     input logic [`NF:0] XManE, YManE, ZManE,   // fractions in U(0.NF) format]
     input logic        XDenormE, YDenormE, ZDenormE, // is the input denormal
@@ -96,15 +101,24 @@ module fma1(
     input logic [`NE-1:0] BiasE,
     input logic     [2:0]       FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
     input logic                 FmtE,       // precision 1 = double 0 = single
-    output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
-    output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
+    // output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
+    // output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
     output logic    [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
     output logic                AddendStickyE,  // sticky bit that is calculated during alignment
-    output logic                KillProdE      // set the product to zero before addition if the product is too small to matter
+    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
+    output logic [3*`NF+5:0]   SumE,
+    output logic NegSumE,
+    output logic InvZE,
+    output logic ZSgnEffE,
+    output logic PSgnE,
+    output logic [8:0] NormCntE
     );
     logic [`NE-1:0] Denorm;
     logic [`NE-1:0] DenormXExp, DenormYExp;             // Denormalized input value
 
+    logic    [2*`NF+1:0]     ProdManE;   // 1.X frac * 1.Y frac in U(2.2Nf) format
+    logic    [3*`NF+5:0]     AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1)
+
     ///////////////////////////////////////////////////////////////////////////////
     // Calculate the product
     //      - When multipliying two fp numbers, add the exponents
@@ -117,8 +131,7 @@ module fma1(
     assign Denorm = FmtE ? 1 : 897;
     assign DenormXExp = XDenormE ? Denorm : XExpE;
     assign DenormYExp = YDenormE ? Denorm : YExpE;
-    assign ProdExpE = (XZeroE|YZeroE) ? 0 :
-                 DenormXExp + DenormYExp - BiasE;
+    assign ProdExpE = (DenormXExp + DenormYExp - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};
 
     // Calculate the product's mantissa
     //      - Mantissa includes the assumed one. If the number is denormalized or zero, it does not have an assumed one.
@@ -189,6 +202,15 @@ module fma1(
 
     alignshift alignshift(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm,
                         .AlignedAddendE, .AddendStickyE, .KillProdE);
+
+                        
+    // Calculate the product's sign
+    //      Negate product's sign if FNMADD or FNMSUB
+ 
+    assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
+    assign ZSgnEffE = ZSgnE^FOpCtrlE[0]; // Swap sign of Z for subtract
+    fmaadd fmaadd(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .SumE, .NegSumE, .InvZE, .NormCntE, .XZeroE, .YZeroE);
+    
 endmodule
 
 
@@ -201,8 +223,8 @@ module fma2(
     input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic     [2:0]       FOpCtrlM,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
     input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic     [2*`NF+1:0]     ProdManM,   // 1.X frac * 1.Y frac
-    input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
+    // input logic     [2*`NF+1:0]     ProdManM,   // 1.X frac * 1.Y frac
+    // input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
     input logic     [`NE+1:0]      ProdExpM,       // X exponent + Y exponent - bias
     input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
     input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
@@ -210,6 +232,12 @@ module fma2(
     input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
     input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
     input logic                 XSNaNM, YSNaNM, ZSNaNM,    // inputs are signaling NaNs
+    input logic [3*`NF+5:0]   SumM,
+    input logic NegSumM,
+    input logic InvZM,
+    input logic ZSgnEffM,
+    input logic PSgnM,
+    input logic [8:0] NormCntM,
     output logic    [`FLEN-1:0]      FMAResM,     // FMA final result
     output logic    [4:0]       FMAFlgM);     // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
    
@@ -218,10 +246,10 @@ module fma2(
     logic [`NF-1:0]     ResultFrac; // Result fraction
     logic [`NE-1:0]     ResultExp;  // Result exponent
     logic               ResultSgn;  // Result sign
-    logic               PSgn;       // product sign
+    // logic               PSgn;       // product sign
     // logic [2*`NF+1:0]   ProdMan2;   // product being added
     // logic [3*`NF+6:0]   AlignedAddend2; // possibly inverted aligned Z
-    logic [3*`NF+5:0]   Sum;        // positive sum
+    // logic [3*`NF+5:0]   Sum;        // positive sum
     // logic [3*`NF+6:0]   PreSum;     // possibly negitive sum
     logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
     // logic [`NE+1:0]     SumExpTmp;  // exponent of the normalized sum not taking into account denormal or zero results
@@ -229,11 +257,11 @@ module fma2(
     logic [`NE+1:0]     FullResultExp;      // ResultExp with bits to determine sign and overflow
     logic [`NF+2:0]     NormSum;    // normalized sum
     // logic [3*`NF+5:0]   SumShifted; // sum shifted for normalization
-    logic [8:0]         NormCnt, NormCntCheck;    // output of the leading zero detector //***change this later
+    // logic [8:0]         NormCnt;    // output of the leading zero detector //***change this later
     logic               NormSumSticky; // sticky bit calulated from the normalized sum
     logic               SumZero;    // is the sum zero
-    logic               NegSum;     // is the sum negitive
-    logic               InvZ;       // invert Z if there is a subtraction (-product + Z or product - Z)
+    // logic               NegSum;     // is the sum negitive
+    // logic               InvZ;       // invert Z if there is a subtraction (-product + Z or product - Z)
     logic               ResultDenorm;   // is the result denormalized
     logic               Sticky, UfSticky;     // Sticky bit
     logic               Plus1, Minus1, CalcPlus1, CalcMinus1;   // do you add or subtract one for rounding
@@ -251,15 +279,9 @@ module fma2(
     logic           SigNaN;     // is an input a signaling NaN
     logic           UnderflowFlag;  // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
     logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
-    logic           ZSgnEffM;
+    //logic           ZSgnEffM;
    
     
-    // Calculate the product's sign
-    //      Negate product's sign if FNMADD or FNMSUB
- 
-    assign PSgn = XSgnM ^ YSgnM ^ (FOpCtrlM[1]&~FOpCtrlM[2]);
-    assign ZSgnEffM = ZSgnM^FOpCtrlM[0]; // Swap sign of Z for subtract
-
 
 
             // ///////////////////////////////////////////////////////////////////////////////
@@ -287,30 +309,6 @@ module fma2(
             // // If the sum is negitive, negate the sum.
             // assign Sum = NegSum ? -PreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
 
-    fmaadd fmaadd(.AlignedAddendM, .ProdManM, .PSgn, .ZSgnEffM, .KillProdM, .Sum, .NegSum, .InvZ, .NormCnt);
-
-
-
-
-            // ///////////////////////////////////////////////////////////////////////////////
-            // // Leading zero counter
-            // ///////////////////////////////////////////////////////////////////////////////
-
-            // //*** replace with non-behavoral code
-            // logic [8:0] i;
-            // always_comb begin
-            //         i = 0;
-            //         while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
-            //         NormCnt = i+1;    // compute shift count
-            // end
-
-    fmalzc fmalzc(.Sum, .NormCntCheck);
-
-
-
-
-
-
 
 
 
@@ -347,7 +345,7 @@ module fma2(
             // assign SumExp = SumZero ? 0 : //***again fix mux
             //                 ResultDenorm ? 0 :
             //                 SumExpTmp;
-    normalize normalize(.Sum, .ZExpM, .ProdExpM, .NormCnt, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
+    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
             .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
 
 
@@ -442,7 +440,7 @@ module fma2(
                 // assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
                 // assign ResultExp = FullResultExp[`NE-1:0];
 
-    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZ, .ResultSgn, .SumExp,
+    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp,
         .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfRound, .UfLSBNormSum);
 
 
@@ -456,13 +454,13 @@ module fma2(
     // Determine the sign if the sum is zero
     //      if cancelation then 0 unless round to -infinity
     //      otherwise psign
-    assign ZeroSgn = (PSgn^ZSgnEffM)&~Underflow ? FrmM == 3'b010 : PSgn;
+    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow ? FrmM[1:0] == 2'b10 : PSgnM;
 
     // is the result negitive
     //  if p - z is the Sum negitive
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
-    assign ResultSgnTmp = InvZ&(ZSgnEffM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnEffM)&PSgn);
+    assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
     assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
  
 
@@ -503,7 +501,7 @@ module fma2(
         // assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
 
         fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
-    .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgn, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
+    .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
     .FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
 
 
@@ -526,11 +524,11 @@ module fma2(
                         YNaNM ? YNaNResult :
                         ZNaNM ? ZNaNResult :
                         Invalid ? InvalidResult : // has to be before inf
-                        XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn,  XExpM[7:0], XManM[51:29]} : 
-                        YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn,  YExpM[7:0], YManM[51:29]} :
+                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
+                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
                         ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
                         KillProdM ? KillProdResult :  
-			Overflow ? OverflowResult :
+			            Overflow ? OverflowResult :
                         Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
                         FmtM ? {ResultSgn, ResultExp, ResultFrac} :
                                {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
@@ -628,18 +626,20 @@ module alignshift(
 endmodule
 
 module fmaadd(
-    input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
-    input logic [2*`NF+1:0] ProdManM,
-    input logic PSgn, ZSgnEffM,
-    input logic KillProdM,
-    output logic [3*`NF+5:0]   Sum,
-    output logic NegSum,
-    output logic InvZ,
-    output logic [8:0] NormCnt
+    input logic     [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition
+    input logic [2*`NF+1:0] ProdManE,
+    input logic PSgnE, ZSgnEffE,
+    input logic KillProdE,
+    input logic XZeroE, YZeroE,
+    output logic [3*`NF+5:0]   SumE,
+    output logic NegSumE,
+    output logic InvZE,
+    output logic [8:0] NormCntE
 );
     logic [3*`NF+6:0]   PreSum, NegPreSum;     // possibly negitive sum
     logic [2*`NF+1:0]   ProdMan2;   // product being added
     logic [3*`NF+6:0]   AlignedAddend2; // possibly inverted aligned Z
+    logic [3*`NF+6:0]   NegProdMan2;
     logic [8:0] PNormCnt, NNormCnt;
 
     ///////////////////////////////////////////////////////////////////////////////
@@ -649,47 +649,48 @@ module fmaadd(
     // Negate Z  when doing one of the following opperations:
     //      -prod +  Z
     //       prod -  Z
-    assign InvZ = ZSgnEffM ^ PSgn;
+    assign InvZE = ZSgnEffE ^ PSgnE;
 
     // Choose an inverted or non-inverted addend - the one is added later
-    assign AlignedAddend2 = InvZ ? -{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
-    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
-    assign ProdMan2 = KillProdM ? 0 : ProdManM;
+    assign AlignedAddend2 = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
+    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)    
+    assign ProdMan2 = ProdManE&{2*`NF+2{~KillProdE}};
+    assign NegProdMan2 = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdMan2, 2'b0};
     poslza poslza(AlignedAddend2, ProdMan2, PNormCnt);
-    neglza neglza({1'b0,AlignedAddendM}, -{{`NF+3{1'b0}}, ProdMan2, 2'b0}, NNormCnt);
+    neglza neglza({1'b0,AlignedAddendE}, NegProdMan2, NNormCnt);
     // Do the addition
     //      - add one to negate if the added was inverted
     //      - the 2 extra bits at the begining and end are needed for rounding
     assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0};
-    assign NegPreSum = AlignedAddendM - {ProdMan2, 2'b0};
+    assign NegPreSum = AlignedAddendE + NegProdMan2;
      
     // Is the sum negitive
-    assign NegSum = PreSum[3*`NF+6];
+    assign NegSumE = PreSum[3*`NF+6];
     // If the sum is negitive, negate the sum.
-    assign Sum = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
-    assign NormCnt = NegSum ? NNormCnt : PNormCnt;
+    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
+    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
 // set to PNormCnt if the product is zero (there may be an additional bit of error from the negation)
 
 endmodule
 
-module fmalzc(
-    input logic [3*`NF+5:0]   Sum,
-    output logic [8:0] NormCntCheck
-);
+// module fmalzc(
+//     input logic [3*`NF+5:0]   Sum,
+//     output logic [8:0] NormCntCheck
+// );
 
-    ///////////////////////////////////////////////////////////////////////////////
-    // Leading one detector
-    ///////////////////////////////////////////////////////////////////////////////
+//     ///////////////////////////////////////////////////////////////////////////////
+//     // Leading one detector
+//     ///////////////////////////////////////////////////////////////////////////////
 
-    //*** replace with non-behavoral code
-    logic [8:0] i;
-    always_comb begin
-            i = 0;
-            while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
-            NormCntCheck = i;
-    end
+//     //*** replace with non-behavoral code
+//     logic [8:0] i;
+//     always_comb begin
+//             i = 0;
+//             while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
+//             NormCntCheck = i;
+//     end
 
-endmodule
+// endmodule
 ////////////////////////////////////////////////////////////////////////////////////
 //	Filename: 	lza.v
 //	Author:		Katherine Parry
@@ -782,10 +783,10 @@ endmodule
 
 
 module normalize(
-    input logic [3*`NF+5:0]   Sum,
+    input logic [3*`NF+5:0]   SumM,
     input logic [`NE-1:0] ZExpM,
     input logic     [`NE+1:0]      ProdExpM,       // X exponent + Y exponent - bias
-    input logic [8:0] NormCnt,
+    input logic [8:0] NormCntM,
     input logic                 FmtM,       // precision 1 = double 0 = single
     input logic KillProdM,
     input logic AddendStickyM,
@@ -810,14 +811,14 @@ module normalize(
     ///////////////////////////////////////////////////////////////////////////////
 
     // Determine if the sum is zero
-    assign SumZero = ~(|Sum);
+    assign SumZero = ~(|SumM);
 
     // determine the length of the fraction based on precision
     assign FracLen = FmtM ? `NF+1 : 13'd24;
     //assign FracLen = `NF;
 
     // Determine if the result is denormal
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} + 1 - (`NF+4));
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
     assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
 
     assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
@@ -826,19 +827,17 @@ module normalize(
     //  - if not denorm add 1 to shift out the leading 1
     assign DenormShift = PreResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
     // Normalize the sum
-    assign SumShiftedTmp = SumZero ? 0 : {2'b0, Sum} << NormCnt+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShiftedTmp = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
     // LZA correction
     assign LZAPlus1 = SumShiftedTmp[3*`NF+7];
     assign SumShifted =  LZAPlus1 ? SumShiftedTmp[3*`NF+6:1] : SumShiftedTmp[3*`NF+5:0];
     assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
     // Calculate the sticky bit
-    assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+2:0]) : (|SumShifted[136:0]);
+    assign NormSumSticky = (|SumShifted[2*`NF+2:0]) | (|SumShifted[136:2*`NF+3]&~FmtM);
     assign UfSticky = AddendStickyM | NormSumSticky;
 
     // Determine sum's exponent
-    assign SumExp = SumZero ? 0 : //***again fix mux
-                 ResultDenorm ? 0 :
-                 SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShiftedTmp[3*`NF+6]);
+    assign SumExp = (SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShiftedTmp[3*`NF+6])) & {`NE+2{~(SumZero|ResultDenorm)}};
 // recalculate if the result is denormalized
 assign ResultDenorm = PreResultDenorm&~SumShiftedTmp[3*`NF+6]&~SumShiftedTmp[3*`NF+7];
                  
@@ -882,7 +881,7 @@ module fmaround(
     input logic AddendStickyM,
     input logic NormSumSticky,
     input logic ZZeroM,
-    input logic InvZ,
+    input logic InvZM,
     input logic [`NE+1:0]     SumExp,     // exponent of the normalized sum
     input logic ResultSgn,
     output logic CalcPlus1, Plus1, UfPlus1, Minus1,
@@ -941,8 +940,8 @@ module fmaround(
     // determine sticky
     assign Sticky = UfSticky | NormSum[0];
     // Deterimine if a small number was supposed to be subtrated
-    assign SubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
-    assign UfSubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky) & ~ZZeroM; //***here
+    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
+    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
 
     always_comb begin
         // Determine if you add 1
@@ -983,7 +982,7 @@ module fmaround(
     // Compute rounded result
     assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
                              Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
-    assign NormSumTruncated = FmtM ? NormSum[`NF+2:3] : {NormSum[54:32], 29'b0};
+    assign NormSumTruncated = {NormSum[`NF+2:32], NormSum[31:3]&{29{FmtM}}};
 
     assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
     assign ResultExp = FullResultExp[`NE-1:0];
@@ -998,7 +997,7 @@ module fmaflags(
     input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
     input logic [`NE+1:0]     FullResultExp,      // ResultExp with bits to determine sign and overflow
     input logic [`NE+1:0]     SumExp,     // exponent of the normalized sum
-    input logic ZSgnEffM, PSgn,
+    input logic ZSgnEffM, PSgnM,
     input logic Round, Guard, UfRound, UfLSBNormSum, Sticky, UfPlus1,
     input logic                 FmtM,       // precision 1 = double 0 = single
     output logic Invalid, Overflow, Underflow,
@@ -1021,7 +1020,7 @@ module fmaflags(
 
     // assign MaxExp = FmtM ? {`NE{1'b1}} : {8{1'b1}};
     assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
-    assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
+    assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
    
     // Set Overflow flag if the number is too big to be represented
     //      - Don't set the overflow flag if an overflowed result isn't outputed
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index 94b695b0c..14545f345 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -124,8 +124,10 @@ module fpu (
 	logic [63:0] 	SgnResE, SgnResM; // sign injection result
 	logic 		    SgnNVE, SgnNVM;   // sign injection invalid flag (Not Valid)
 
-	logic [63:0] 	FResM, FResW;     // selected result that is ready in the memory stage
-	logic [4:0] 	FFlgM;            // selected flag that is ready in the memory stage
+	logic [63:0] 	FResE, FResM, FResW;     // selected result that is ready in the memory stage
+	logic [4:0] 	FFlgE, FFlgM;            // selected flag that is ready in the memory stage
+
+	logic [`XLEN-1:0]  FIntResE;
 
 	logic [63:0] 	   FPUResultW;    // final FP result being written to the FP register
 		
@@ -133,7 +135,7 @@ module fpu (
 	logic 		    FDivSqrtDoneE;          // is divide done
 	logic [63:0] 	DivInput1E, DivInput2E; // inputs to divide/squareroot unit
 	logic 		    FDivClk;                // clock for divide/squareroot unit
-	logic [63:0] 	AlignedSrcAM;           // align SrcA to the floating point format
+	logic [63:0] 	AlignedSrcAE;           // align SrcA to the floating point format
 
 
 
@@ -305,7 +307,16 @@ module fpu (
 	assign FWriteDataE = FSrcYE[`XLEN-1:0];
 	
 
+	// Align SrcA to MSB when single precicion
+	mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
 
+  // select a result that may be written to the FP register
+	mux4  #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
+	mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, FResSelE, FFlgE);
+	
+  // select the result that may be written to the integer register - to IEU
+	mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
+	
 
 
   //***will synth remove registers of values that are always zero?
@@ -313,7 +324,7 @@ module fpu (
 	// E/M pipe registers
 	////////////////////////////////////////////////////////////////////////////////////////
 
-	flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
+	// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
 	flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
 	flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
 	flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM});
@@ -321,23 +332,23 @@ module fpu (
 				{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
 				{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
 	
-	flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); 
-	flopenrc #(1)  EMRegCmpFlg(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); 
+	flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); 
+	flopenrc #(5)  EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); 
 	
-	flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
-	flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
+	flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
+	// flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
 
 	flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
 	flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
 	
-	flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
-	flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
+	// flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
+	// flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
   
-	flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
+	// flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
 	
-	flopenrc #(18) EMCtrlReg(clk, reset, FlushM, ~StallM,
-				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE},
-				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM});
+	flopenrc #(14) EMCtrlReg(clk, reset, FlushM, ~StallM,
+				 {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE},
+				 {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM});
 	
 	
 
@@ -348,16 +359,7 @@ module fpu (
 	//BEGIN MEMORY STAGE
 	////////////////////////////////////////////////////////////////////////////////////////
 
-	// Align SrcA to MSB when single precicion
-	mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
 
-  // select a result that may be written to the FP register
-	mux4  #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
-	mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
-	
-  // select the result that may be written to the integer register - to IEU
-	mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
-	
   // FPU flag selection - to privileged
 	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);