Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2021-08-13 17:23:04 -05:00 · 2021-08-13 17:23:04 -05:00 · 766c829d31
commit 766c829d31
parent 55fda4de62 aedd71d570
6 changed files with 151 additions and 141 deletions
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -130,10 +130,21 @@ assign	wnan = FmtE ? &FMAResM[`FLEN-2:`NF] && |FMAResM[`NF-1:0] : &FMAResM[30:23
 // assign	ZNaNE = FmtE ? &Z[62:52] && |Z[51:0] : &Z[62:55] && |Z[54:32]; 
 assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0]; 
 // instantiate device under test
-fma1 UUT1(.XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}), .*);
+
    logic [3*`NF+5:0]	SumE, SumM;       
    logic 			    InvZE, InvZM;
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
    logic [8:0]			NormCntE, NormCntM;
    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}),
                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
 fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .ZSgnM(ZSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
              //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
-               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .AlignedAddendM(AlignedAddendE), .ProdManM(ProdManE),
+               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@ -1,3 +1,3 @@
-testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace
--- a/wally-pipelined/src/fpu/cvtfp.sv
+++ b/wally-pipelined/src/fpu/cvtfp.sv
@ -41,12 +41,10 @@ module cvtfp (
    logic [12:0] ShiftCnt;
 	logic [51:0] SFrac;
 	logic [25:0] DFrac;
-	logic [77:0] DFracTmp,tmp, tmp2;
+	logic [77:0] DFracTmp;
    //assign ShiftCnt = FmtE ? -DExpCalc&{13{Denorm}} : NormCnt;
    assign SFrac = XManE[51:0] << NormCnt;
 logic Shift;
 assign tmp = (-DExpCalc+1)&{13{Shift}};
 assign tmp2 = {XManE, 23'b0};
 assign Shift = {13{Denorm|(($signed(DExpCalc) > $signed(-25)) & DExpCalc[12])}};
 	assign DFracTmp = {XManE, 25'b0} >> ((-DExpCalc+1)&{13{Shift}});
 assign DFrac = DFracTmp[76:51];
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -58,28 +58,33 @@ module fma(
      //	{?, is mul, negate product, negate addend}
    // signals transfered between pipeline stages
-    logic [2*`NF+1:0]	ProdManE, ProdManM; 
+    // logic [2*`NF+1:0]	ProdManE, ProdManM; 
-    logic [3*`NF+5:0]	AlignedAddendE, AlignedAddendM;                       
+    // logic [3*`NF+5:0]	AlignedAddendE, AlignedAddendM;
    logic [3*`NF+5:0]	SumE, SumM;                       
    logic [`NE+1:0]	    ProdExpE, ProdExpM;
    logic 			    AddendStickyE, AddendStickyM;
    logic 			    KillProdE, KillProdM;
    logic 			    InvZE, InvZM;
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
    logic [8:0]			NormCntE, NormCntM;
-    fma1 fma1 (.XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
+    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
-                .FOpCtrlE, .FmtE, .ProdManE, .AlignedAddendE,
+                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
    // E/M pipeline registers
-    flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); 
+    // flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); 
-    flopenrc #(162) EMRegFma2(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); 
+    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-    flopenrc #(2) EMRegFma4(clk, reset, FlushM, ~StallM, 
+    flopenrc #(15) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                            {AddendStickyE, KillProdE},
+                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE},
-                            {AddendStickyM, KillProdM});
+                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM});
    fma2 fma2(.XSgnM, .YSgnM, .ZSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
-            .FOpCtrlM, .FrmM, .FmtM, 
+            .FOpCtrlM, .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
            .ProdManM, .AlignedAddendM, .ProdExpM, .AddendStickyM, .KillProdM, 
            .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
            .FMAResM, .FMAFlgM);
@ -88,7 +93,7 @@ endmodule
 module fma1(
-    // input logic        XSgnE, YSgnE, ZSgnE,
+    input logic        XSgnE, YSgnE, ZSgnE,
    input logic [`NE-1:0] XExpE, YExpE, ZExpE,      // biased exponents in B(NE.0) format
    input logic [`NF:0] XManE, YManE, ZManE,   // fractions in U(0.NF) format]
    input logic        XDenormE, YDenormE, ZDenormE, // is the input denormal
@ -96,15 +101,24 @@ module fma1(
    input logic [`NE-1:0] BiasE,
    input logic     [2:0]       FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtE,       // precision 1 = double 0 = single
-    output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
+    // output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
-    output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
+    // output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
    output logic    [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
-    output logic                KillProdE      // set the product to zero before addition if the product is too small to matter
+    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
    output logic [3*`NF+5:0]   SumE,
    output logic NegSumE,
    output logic InvZE,
    output logic ZSgnEffE,
    output logic PSgnE,
    output logic [8:0] NormCntE
    );
    logic [`NE-1:0] Denorm;
    logic [`NE-1:0] DenormXExp, DenormYExp;             // Denormalized input value
    logic    [2*`NF+1:0]     ProdManE;   // 1.X frac * 1.Y frac in U(2.2Nf) format
    logic    [3*`NF+5:0]     AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1)
    ///////////////////////////////////////////////////////////////////////////////
    // Calculate the product
    //      - When multipliying two fp numbers, add the exponents
@ -117,8 +131,7 @@ module fma1(
    assign Denorm = FmtE ? 1 : 897;
    assign DenormXExp = XDenormE ? Denorm : XExpE;
    assign DenormYExp = YDenormE ? Denorm : YExpE;
-    assign ProdExpE = (XZeroE|YZeroE) ? 0 :
+    assign ProdExpE = (DenormXExp + DenormYExp - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};
                 DenormXExp + DenormYExp - BiasE;
    // Calculate the product's mantissa
    //      - Mantissa includes the assumed one. If the number is denormalized or zero, it does not have an assumed one.
@ -189,6 +202,15 @@ module fma1(
    alignshift alignshift(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm,
                        .AlignedAddendE, .AddendStickyE, .KillProdE);
    // Calculate the product's sign
    //      Negate product's sign if FNMADD or FNMSUB
    assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
    assign ZSgnEffE = ZSgnE^FOpCtrlE[0]; // Swap sign of Z for subtract
    fmaadd fmaadd(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .SumE, .NegSumE, .InvZE, .NormCntE, .XZeroE, .YZeroE);
 endmodule
@ -201,8 +223,8 @@ module fma2(
    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic     [2:0]       FOpCtrlM,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic     [2*`NF+1:0]     ProdManM,   // 1.X frac * 1.Y frac
+    // input logic     [2*`NF+1:0]     ProdManM,   // 1.X frac * 1.Y frac
-    input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
+    // input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
    input logic     [`NE+1:0]      ProdExpM,       // X exponent + Y exponent - bias
    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
@ -210,6 +232,12 @@ module fma2(
    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic                 XSNaNM, YSNaNM, ZSNaNM,    // inputs are signaling NaNs
    input logic [3*`NF+5:0]   SumM,
    input logic NegSumM,
    input logic InvZM,
    input logic ZSgnEffM,
    input logic PSgnM,
    input logic [8:0] NormCntM,
    output logic    [`FLEN-1:0]      FMAResM,     // FMA final result
    output logic    [4:0]       FMAFlgM);     // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -218,10 +246,10 @@ module fma2(
    logic [`NF-1:0]     ResultFrac; // Result fraction
    logic [`NE-1:0]     ResultExp;  // Result exponent
    logic               ResultSgn;  // Result sign
-    logic               PSgn;       // product sign
+    // logic               PSgn;       // product sign
    // logic [2*`NF+1:0]   ProdMan2;   // product being added
    // logic [3*`NF+6:0]   AlignedAddend2; // possibly inverted aligned Z
-    logic [3*`NF+5:0]   Sum;        // positive sum
+    // logic [3*`NF+5:0]   Sum;        // positive sum
    // logic [3*`NF+6:0]   PreSum;     // possibly negitive sum
    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
    // logic [`NE+1:0]     SumExpTmp;  // exponent of the normalized sum not taking into account denormal or zero results
@ -229,11 +257,11 @@ module fma2(
    logic [`NE+1:0]     FullResultExp;      // ResultExp with bits to determine sign and overflow
    logic [`NF+2:0]     NormSum;    // normalized sum
    // logic [3*`NF+5:0]   SumShifted; // sum shifted for normalization
-    logic [8:0]         NormCnt, NormCntCheck;    // output of the leading zero detector //***change this later
+    // logic [8:0]         NormCnt;    // output of the leading zero detector //***change this later
    logic               NormSumSticky; // sticky bit calulated from the normalized sum
    logic               SumZero;    // is the sum zero
-    logic               NegSum;     // is the sum negitive
+    // logic               NegSum;     // is the sum negitive
-    logic               InvZ;       // invert Z if there is a subtraction (-product + Z or product - Z)
+    // logic               InvZ;       // invert Z if there is a subtraction (-product + Z or product - Z)
    logic               ResultDenorm;   // is the result denormalized
    logic               Sticky, UfSticky;     // Sticky bit
    logic               Plus1, Minus1, CalcPlus1, CalcMinus1;   // do you add or subtract one for rounding
@ -251,15 +279,9 @@ module fma2(
    logic           SigNaN;     // is an input a signaling NaN
    logic           UnderflowFlag;  // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
    logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
-    logic           ZSgnEffM;
+    //logic           ZSgnEffM;
    // Calculate the product's sign
    //      Negate product's sign if FNMADD or FNMSUB
    assign PSgn = XSgnM ^ YSgnM ^ (FOpCtrlM[1]&~FOpCtrlM[2]);
    assign ZSgnEffM = ZSgnM^FOpCtrlM[0]; // Swap sign of Z for subtract
            // ///////////////////////////////////////////////////////////////////////////////
@ -287,30 +309,6 @@ module fma2(
            // // If the sum is negitive, negate the sum.
            // assign Sum = NegSum ? -PreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
    fmaadd fmaadd(.AlignedAddendM, .ProdManM, .PSgn, .ZSgnEffM, .KillProdM, .Sum, .NegSum, .InvZ, .NormCnt);
            // ///////////////////////////////////////////////////////////////////////////////
            // // Leading zero counter
            // ///////////////////////////////////////////////////////////////////////////////
            // //*** replace with non-behavoral code
            // logic [8:0] i;
            // always_comb begin
            //         i = 0;
            //         while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
            //         NormCnt = i+1;    // compute shift count
            // end
    fmalzc fmalzc(.Sum, .NormCntCheck);
@ -347,7 +345,7 @@ module fma2(
            // assign SumExp = SumZero ? 0 : //***again fix mux
            //                 ResultDenorm ? 0 :
            //                 SumExpTmp;
-    normalize normalize(.Sum, .ZExpM, .ProdExpM, .NormCnt, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
+    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
            .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
@ -442,7 +440,7 @@ module fma2(
                // assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
                // assign ResultExp = FullResultExp[`NE-1:0];
-    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZ, .ResultSgn, .SumExp,
+    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp,
        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfRound, .UfLSBNormSum);
@ -456,13 +454,13 @@ module fma2(
    // Determine the sign if the sum is zero
    //      if cancelation then 0 unless round to -infinity
    //      otherwise psign
-    assign ZeroSgn = (PSgn^ZSgnEffM)&~Underflow ? FrmM == 3'b010 : PSgn;
+    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow ? FrmM[1:0] == 2'b10 : PSgnM;
    // is the result negitive
    //  if p - z is the Sum negitive
    //  if -p + z is the Sum positive
    //  if -p - z then the Sum is negitive
-    assign ResultSgnTmp = InvZ&(ZSgnEffM)&NegSum | InvZ&PSgn&~NegSum | ((ZSgnEffM)&PSgn);
+    assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
    assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
@ -503,7 +501,7 @@ module fma2(
        // assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
        fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
-    .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgn, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
+    .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
    .FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
@ -526,8 +524,8 @@ module fma2(
                        YNaNM ? YNaNResult :
                        ZNaNM ? ZNaNResult :
                        Invalid ? InvalidResult : // has to be before inf
-                        XInfM ? FmtM ? {PSgn, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgn,  XExpM[7:0], XManM[51:29]} : 
+                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
-                        YInfM ? FmtM ? {PSgn, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgn,  YExpM[7:0], YManM[51:29]} :
+                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
                        KillProdM ? KillProdResult :  
 			            Overflow ? OverflowResult :
@ -628,18 +626,20 @@ module alignshift(
 endmodule
 module fmaadd(
-    input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
+    input logic     [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition
-    input logic [2*`NF+1:0] ProdManM,
+    input logic [2*`NF+1:0] ProdManE,
-    input logic PSgn, ZSgnEffM,
+    input logic PSgnE, ZSgnEffE,
-    input logic KillProdM,
+    input logic KillProdE,
-    output logic [3*`NF+5:0]   Sum,
+    input logic XZeroE, YZeroE,
-    output logic NegSum,
+    output logic [3*`NF+5:0]   SumE,
-    output logic InvZ,
+    output logic NegSumE,
-    output logic [8:0] NormCnt
+    output logic InvZE,
    output logic [8:0] NormCntE
 );
    logic [3*`NF+6:0]   PreSum, NegPreSum;     // possibly negitive sum
    logic [2*`NF+1:0]   ProdMan2;   // product being added
    logic [3*`NF+6:0]   AlignedAddend2; // possibly inverted aligned Z
    logic [3*`NF+6:0]   NegProdMan2;
    logic [8:0] PNormCnt, NNormCnt;
    ///////////////////////////////////////////////////////////////////////////////
@ -649,47 +649,48 @@ module fmaadd(
    // Negate Z  when doing one of the following opperations:
    //      -prod +  Z
    //       prod -  Z
-    assign InvZ = ZSgnEffM ^ PSgn;
+    assign InvZE = ZSgnEffE ^ PSgnE;
    // Choose an inverted or non-inverted addend - the one is added later
-    assign AlignedAddend2 = InvZ ? -{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
+    assign AlignedAddend2 = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)    
-    assign ProdMan2 = KillProdM ? 0 : ProdManM;
+    assign ProdMan2 = ProdManE&{2*`NF+2{~KillProdE}};
    assign NegProdMan2 = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdMan2, 2'b0};
    poslza poslza(AlignedAddend2, ProdMan2, PNormCnt);
-    neglza neglza({1'b0,AlignedAddendM}, -{{`NF+3{1'b0}}, ProdMan2, 2'b0}, NNormCnt);
+    neglza neglza({1'b0,AlignedAddendE}, NegProdMan2, NNormCnt);
    // Do the addition
    //      - add one to negate if the added was inverted
    //      - the 2 extra bits at the begining and end are needed for rounding
    assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0};
-    assign NegPreSum = AlignedAddendM - {ProdMan2, 2'b0};
+    assign NegPreSum = AlignedAddendE + NegProdMan2;
    // Is the sum negitive
-    assign NegSum = PreSum[3*`NF+6];
+    assign NegSumE = PreSum[3*`NF+6];
    // If the sum is negitive, negate the sum.
-    assign Sum = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
+    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
-    assign NormCnt = NegSum ? NNormCnt : PNormCnt;
+    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
 // set to PNormCnt if the product is zero (there may be an additional bit of error from the negation)
 endmodule
-module fmalzc(
+// module fmalzc(
-    input logic [3*`NF+5:0]   Sum,
+//     input logic [3*`NF+5:0]   Sum,
-    output logic [8:0] NormCntCheck
+//     output logic [8:0] NormCntCheck
-);
+// );
-    ///////////////////////////////////////////////////////////////////////////////
+//     ///////////////////////////////////////////////////////////////////////////////
-    // Leading one detector
+//     // Leading one detector
-    ///////////////////////////////////////////////////////////////////////////////
+//     ///////////////////////////////////////////////////////////////////////////////
-    //*** replace with non-behavoral code
+//     //*** replace with non-behavoral code
-    logic [8:0] i;
+//     logic [8:0] i;
-    always_comb begin
+//     always_comb begin
-            i = 0;
+//             i = 0;
-            while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
+//             while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
-            NormCntCheck = i;
+//             NormCntCheck = i;
-    end
+//     end
-endmodule
+// endmodule
 ////////////////////////////////////////////////////////////////////////////////////
 //	Filename: 	lza.v
 //	Author:		Katherine Parry
@ -782,10 +783,10 @@ endmodule
 module normalize(
-    input logic [3*`NF+5:0]   Sum,
+    input logic [3*`NF+5:0]   SumM,
    input logic [`NE-1:0] ZExpM,
    input logic     [`NE+1:0]      ProdExpM,       // X exponent + Y exponent - bias
-    input logic [8:0] NormCnt,
+    input logic [8:0] NormCntM,
    input logic                 FmtM,       // precision 1 = double 0 = single
    input logic KillProdM,
    input logic AddendStickyM,
@ -810,14 +811,14 @@ module normalize(
    ///////////////////////////////////////////////////////////////////////////////
    // Determine if the sum is zero
-    assign SumZero = ~(|Sum);
+    assign SumZero = ~(|SumM);
    // determine the length of the fraction based on precision
    assign FracLen = FmtM ? `NF+1 : 13'd24;
    //assign FracLen = `NF;
    // Determine if the result is denormal
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} + 1 - (`NF+4));
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
    assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
@ -826,19 +827,17 @@ module normalize(
    //  - if not denorm add 1 to shift out the leading 1
    assign DenormShift = PreResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
    // Normalize the sum
-    assign SumShiftedTmp = SumZero ? 0 : {2'b0, Sum} << NormCnt+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShiftedTmp = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
    // LZA correction
    assign LZAPlus1 = SumShiftedTmp[3*`NF+7];
    assign SumShifted =  LZAPlus1 ? SumShiftedTmp[3*`NF+6:1] : SumShiftedTmp[3*`NF+5:0];
    assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
    // Calculate the sticky bit
-    assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+2:0]) : (|SumShifted[136:0]);
+    assign NormSumSticky = (|SumShifted[2*`NF+2:0]) | (|SumShifted[136:2*`NF+3]&~FmtM);
    assign UfSticky = AddendStickyM | NormSumSticky;
    // Determine sum's exponent
-    assign SumExp = SumZero ? 0 : //***again fix mux
+    assign SumExp = (SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShiftedTmp[3*`NF+6])) & {`NE+2{~(SumZero|ResultDenorm)}};
                 ResultDenorm ? 0 :
                 SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShiftedTmp[3*`NF+6]);
 // recalculate if the result is denormalized
 assign ResultDenorm = PreResultDenorm&~SumShiftedTmp[3*`NF+6]&~SumShiftedTmp[3*`NF+7];
@ -882,7 +881,7 @@ module fmaround(
    input logic AddendStickyM,
    input logic NormSumSticky,
    input logic ZZeroM,
-    input logic InvZ,
+    input logic InvZM,
    input logic [`NE+1:0]     SumExp,     // exponent of the normalized sum
    input logic ResultSgn,
    output logic CalcPlus1, Plus1, UfPlus1, Minus1,
@ -941,8 +940,8 @@ module fmaround(
    // determine sticky
    assign Sticky = UfSticky | NormSum[0];
    // Deterimine if a small number was supposed to be subtrated
-    assign SubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
+    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
-    assign UfSubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky) & ~ZZeroM; //***here
+    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
    always_comb begin
        // Determine if you add 1
@ -983,7 +982,7 @@ module fmaround(
    // Compute rounded result
    assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
                             Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
-    assign NormSumTruncated = FmtM ? NormSum[`NF+2:3] : {NormSum[54:32], 29'b0};
+    assign NormSumTruncated = {NormSum[`NF+2:32], NormSum[31:3]&{29{FmtM}}};
    assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
    assign ResultExp = FullResultExp[`NE-1:0];
@ -998,7 +997,7 @@ module fmaflags(
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic [`NE+1:0]     FullResultExp,      // ResultExp with bits to determine sign and overflow
    input logic [`NE+1:0]     SumExp,     // exponent of the normalized sum
-    input logic ZSgnEffM, PSgn,
+    input logic ZSgnEffM, PSgnM,
    input logic Round, Guard, UfRound, UfLSBNormSum, Sticky, UfPlus1,
    input logic                 FmtM,       // precision 1 = double 0 = single
    output logic Invalid, Overflow, Underflow,
@ -1021,7 +1020,7 @@ module fmaflags(
    // assign MaxExp = FmtM ? {`NE{1'b1}} : {8{1'b1}};
    assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
-    assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
+    assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
    // Set Overflow flag if the number is too big to be represented
    //      - Don't set the overflow flag if an overflowed result isn't outputed
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -124,8 +124,10 @@ module fpu (
 	logic [63:0] 	SgnResE, SgnResM; // sign injection result
 	logic 		    SgnNVE, SgnNVM;   // sign injection invalid flag (Not Valid)
-	logic [63:0] 	FResM, FResW;     // selected result that is ready in the memory stage
+	logic [63:0] 	FResE, FResM, FResW;     // selected result that is ready in the memory stage
-	logic [4:0] 	FFlgM;            // selected flag that is ready in the memory stage
+	logic [4:0] 	FFlgE, FFlgM;            // selected flag that is ready in the memory stage
 	logic [`XLEN-1:0]  FIntResE;
 	logic [63:0] 	   FPUResultW;    // final FP result being written to the FP register
@ -133,7 +135,7 @@ module fpu (
 	logic 		    FDivSqrtDoneE;          // is divide done
 	logic [63:0] 	DivInput1E, DivInput2E; // inputs to divide/squareroot unit
 	logic 		    FDivClk;                // clock for divide/squareroot unit
-	logic [63:0] 	AlignedSrcAM;           // align SrcA to the floating point format
+	logic [63:0] 	AlignedSrcAE;           // align SrcA to the floating point format
@ -305,6 +307,15 @@ module fpu (
 	assign FWriteDataE = FSrcYE[`XLEN-1:0];
 	// Align SrcA to MSB when single precicion
 	mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
  // select a result that may be written to the FP register
 	mux4  #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
 	mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, FResSelE, FFlgE);
  // select the result that may be written to the integer register - to IEU
 	mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
@ -313,7 +324,7 @@ module fpu (
 	// E/M pipe registers
 	////////////////////////////////////////////////////////////////////////////////////////
-	flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
+	// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
 	flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
 	flopenrc #(65) EMFpReg3(clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM});
 	flopenrc #(65) EMFpReg4(clk, reset, FlushM, ~StallM, {ZSgnE,ZExpE,ZManE}, {ZSgnM,ZExpM,ZManM});
@ -321,23 +332,23 @@ module fpu (
 				{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
 				{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
-	flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, CmpResE, CmpResM); 
+	flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); 
-	flopenrc #(1)  EMRegCmpFlg(clk, reset, FlushM, ~StallM, CmpNVE, CmpNVM); 
+	flopenrc #(5)  EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM); 
-	flopenrc #(64) EMRegSgnRes(clk, reset, FlushM, ~StallM, SgnResE, SgnResM);
+	flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
-	flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
+	// flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
 	flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
 	flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
-	flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
+	// flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
-	flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
+	// flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
-	flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
+	// flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
-	flopenrc #(18) EMCtrlReg(clk, reset, FlushM, ~StallM,
+	flopenrc #(14) EMCtrlReg(clk, reset, FlushM, ~StallM,
-				 {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE},
+				 {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, XNormE, YNormE},
-				 {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM});
+				 {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM, XNormM, YNormM});
@ -348,15 +359,6 @@ module fpu (
 	//BEGIN MEMORY STAGE
 	////////////////////////////////////////////////////////////////////////////////////////
 	// Align SrcA to MSB when single precicion
 	mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM);
  // select a result that may be written to the FP register
 	mux4  #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM);
 	mux4  #(5)  FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM);
  // select the result that may be written to the integer register - to IEU
 	mux4  #(`XLEN)  IntResMux(CmpResM[`XLEN-1:0], FSrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM);
  // FPU flag selection - to privileged
 	mux5  #(5)  FPUFlgMux(5'b0, FMAFlgM, CvtFpFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
`@ -1,3 +1,3 @@`
	`testfloat_gen f32_add -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`	`testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat`
	`tr -d ' ' < testFloat > testFloatNoSpace`	`tr -d ' ' < testFloat > testFloatNoSpace`