FMA cleanup

2025-02-11 06:05:49 +00:00 · 2021-08-28 10:53:35 -04:00 · 2021-08-28 10:53:35 -04:00 · 7607adc951
commit 7607adc951
parent b5d6c4fb46
4 changed files with 234 additions and 546 deletions
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -142,7 +142,7 @@ assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[
                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
-fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .ZSgnM(ZSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
+fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
              //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -31,7 +31,7 @@ module fma(
    input logic                 FlushM,     // flush the memory stage
    input logic                 StallM,     // stall memory stage
    input logic                 FmtE, FmtM, // precision 1 = double 0 = single
-    input logic  [2:0]          FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
+    input logic  [2:0]          FOpCtrlE, // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
    input logic [`NE-1:0]       XExpE, YExpE, ZExpE,    // input exponents - execute stage
@ -45,21 +45,20 @@ module fma(
    input logic                 XSNaNM, YSNaNM, ZSNaNM,     // is signaling NaN
    input logic                 XZeroM, YZeroM, ZZeroM,     // is zero - memory stage
    input logic                 XInfM, YInfM, ZInfM,        // is infinity
-    input logic [10:0]          BiasE,      // bias - depends on precison (max exponent/2)
+    input logic [10:0]          BiasE,      // bias (max exponent/2) ***parameterize in unpacking unit
 	output logic [`FLEN-1:0]    FMAResM,    // FMA result
 	output logic [4:0]		    FMAFlgM);   // FMA flags
-  //fma/mult	
+  //fma/mult/add	
-      //  fmadd  = ?000
+      //  fmadd  = 000
-      //  fmsub  = ?001
+      //  fmsub  = 001
-      //  fnmsub = ?010	-(a*b)+c
+      //  fnmsub = 010	-(a*b)+c
-      //  fnmadd = ?011 -(a*b)-c
+      //  fnmadd = 011  -(a*b)-c
-      //  fmul   = ?100
+      //  fmul   = 100
-      //	{?, is mul, negate product, negate addend}
+      //  fadd   = 110
      //  fsub   = 111
    // signals transfered between pipeline stages
    // logic [2*`NF+1:0]	ProdManE, ProdManM; 
    // logic [3*`NF+5:0]	AlignedAddendE, AlignedAddendM;
    logic [3*`NF+5:0]	SumE, SumM;                       
    logic [`NE+1:0]	    ProdExpE, ProdExpM;
    logic 			    AddendStickyE, AddendStickyM;
@ -76,7 +75,6 @@ module fma(
                .ProdExpE, .AddendStickyE, .KillProdE); 
    // E/M pipeline registers
    // flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); 
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
    flopenrc #(15) EMRegFma4(clk, reset, FlushM, ~StallM, 
@ -84,7 +82,7 @@ module fma(
                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM});
    fma2 fma2(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
-            .FOpCtrlM, .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
+            .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
            .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
            .FMAResM, .FMAFlgM);
@ -93,29 +91,27 @@ endmodule
 module fma1(
-    input logic        XSgnE, YSgnE, ZSgnE,
+    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
    input logic  [`NE-1:0]      XExpE, YExpE, ZExpE,    // biased exponents in B(NE.0) format
-    input logic [`NF:0] XManE, YManE, ZManE,   // fractions in U(0.NF) format]
+    input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
    input logic                 XDenormE, YDenormE, ZDenormE, // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic [`NE-1:0] BiasE,
+    input logic  [`NE-1:0]      BiasE,      // bias (max exponent/2)
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtE,       // precision 1 = double 0 = single
    // output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
    // output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
    output logic [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
-    output logic [3*`NF+5:0]   SumE,
+    output logic [3*`NF+5:0]    SumE,           // the positive sum
-    output logic NegSumE,
+    output logic                NegSumE,        // was the sum negitive
-    output logic InvZE,
+    output logic                InvZE,          // intert Z
-    output logic ZSgnEffE,
+    output logic                ZSgnEffE,       // the modified Z sign
-    output logic PSgnE,
+    output logic                PSgnE,          // the product's sign
-    output logic [8:0] NormCntE
+    output logic [8:0]          NormCntE        // normalization shift cnt
    );
    logic [`NE-1:0] Denorm;
    logic [`NE-1:0] DenormXExp, DenormYExp;             // Denormalized input value
    logic [`NE-1:0]     Denorm;             // value of a denormaized number based on precision
    logic [`NE-1:0]     XExpVal, YExpVal;   // Exponent value after taking into account denormals
    logic [2*`NF+1:0]   ProdManE;           // 1.X frac * 1.Y frac in U(2.2Nf) format
    logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)
@ -123,82 +119,24 @@ module fma1(
    // Calculate the product
    //      - When multipliying two fp numbers, add the exponents
    //      - Subtract the bias (XExp + YExp has two biases, one from each exponent)
-    //      - Denormal numbers have an an exponent value of 1, however they are
+    //      - If the product is zero then kill the exponent - this is a problem 
    //        represented with an exponent of 0. add one if there is a denormal number
    ///////////////////////////////////////////////////////////////////////////////
    // denormalized numbers have diffrent values depending on which precison it is.
    //      double - 1
    //      single - 1024-128+1 = 897
    assign Denorm = FmtE ? 1 : 897;
-    assign DenormXExp = XDenormE ? Denorm : XExpE;
+    assign XExpVal = XDenormE ? Denorm : XExpE;
-    assign DenormYExp = YDenormE ? Denorm : YExpE;
+    assign YExpVal = YDenormE ? Denorm : YExpE;
-    assign ProdExpE = (DenormXExp + DenormYExp - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};
+    // take into account if the product is zero, the product's exponent does not compute properly if X or Y is zero
    assign ProdExpE = (XExpVal + YExpVal - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};
-    // Calculate the product's mantissa
+    // multiplication of the mantissa's
    //      - Mantissa includes the assumed one. If the number is denormalized or zero, it does not have an assumed one.
            // assign ProdManE =  XManE * YManE;
    mult mult(.XManE, .YManE, .ProdManE);
-            // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
-            // // Alignment shifter
+    // Alignment shifter
-            // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
            // // determine the shift count for alignment
            // //      - negitive means Z is larger, so shift Z left
            // //      - positive means the product is larger, so shift Z right
            // //      - Denormal numbers have an an exponent value of 1, however they are
            // //        represented with an exponent of 0. add one to the exponent if it is a denormal number
            // assign AlignCnt = ProdExpE - (ZExpE + ({`NE-1{ZDenormE}}&Denorm));
            // // Defualt Addition without shifting
            // //          |   54'b0    |  106'b(product)  | 2'b0 |
            // //                       |1'b0| addnend |
            // // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
            // assign ZManPreShifted = {(`NF+3)'(0), ZManE, /*106*/(2*`NF+2)'(0)};
            // always_comb
            //     begin
            //     // If the product is too small to effect the sum, kill the product
            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
            //     //  | addnend |
            //     if ($signed(AlignCnt) <= $signed(-(`NF+4))) begin
            //         KillProdE = 1;
            //         ZManShifted = ZManPreShifted;//{107'b0, XManE, 54'b0};
            //         AddendStickyE = ~(XZeroE|YZeroE);
            //     // If the Addend is shifted left (negitive AlignCnt)
            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
            //     //                  | addnend |
            //     end else if($signed(AlignCnt) <= $signed(0))  begin
            //         KillProdE = 0;
            //         ZManShifted = ZManPreShifted << -AlignCnt;
            //         AddendStickyE = |(ZManShifted[`NF-1:0]);
            //     // If the Addend is shifted right (positive AlignCnt)
            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
            //     //                                  | addnend |
            //     end else if ($signed(AlignCnt)<=$signed(2*`NF+1))  begin
            //         KillProdE = 0;
            //         ZManShifted = ZManPreShifted >> AlignCnt;
            //         AddendStickyE = |(ZManShifted[`NF-1:0]);
            //     // If the addend is too small to effect the addition        
            //     //      - The addend has to shift two past the end of the addend to be considered too small
            //     //      - The 2 extra bits are needed for rounding
            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
            //     //                                                      | addnend |
            //     end else begin
            //         KillProdE = 0;
            //         ZManShifted = 0;
            //         AddendStickyE = ~ZZeroE;
            //     end
            // end
            // assign AlignedAddendE = ZManShifted[4*`NF+5:`NF];
    alignshift alignshift(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm,
                        .AlignedAddendE, .AddendStickyE, .KillProdE);
@ -209,22 +147,39 @@ module fma1(
    assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
    assign ZSgnEffE = ZSgnE^FOpCtrlE[0]; // Swap sign of Z for subtract
    // ///////////////////////////////////////////////////////////////////////////////
    // // Addition/LZA
    // ///////////////////////////////////////////////////////////////////////////////
    fmaadd fmaadd(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .SumE, .NegSumE, .InvZE, .NormCntE, .XZeroE, .YZeroE);
 endmodule
 module fma2(
-    input logic        XSgnM, YSgnM,
+    input logic                 XSgnM, YSgnM,        // input signs
-    input logic [`NE-1:0] XExpM, YExpM, ZExpM,
+    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
-    input logic [`NF:0] XManM, YManM, ZManM,
+    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic     [2:0]       FOpCtrlM,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtM,       // precision 1 = double 0 = single
    // input logic     [2*`NF+1:0]     ProdManM,   // 1.X frac * 1.Y frac
    // input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
    input logic     [`NE+1:0]   ProdExpM,       // X exponent + Y exponent - bias
    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
@ -232,12 +187,12 @@ module fma2(
    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic [3*`NF+5:0]   SumM,
+    input logic     [3*`NF+5:0] SumM,       // the positive sum
-    input logic NegSumM,
+    input logic                 NegSumM,    // was the sum negitive
-    input logic InvZM,
+    input logic                 InvZM,      // do you invert Z
-    input logic ZSgnEffM,
+    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic PSgnM,
+    input logic                 PSgnM,      // the product's sign
-    input logic [8:0] NormCntM,
+    input logic     [8:0]       NormCntM,   // the normalization shift count
    output logic    [`FLEN-1:0] FMAResM,    // FMA final result
    output logic    [4:0]       FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -246,199 +201,45 @@ module fma2(
    logic [`NF-1:0]     ResultFrac; // Result fraction
    logic [`NE-1:0]     ResultExp;  // Result exponent
    logic               ResultSgn;  // Result sign
    // logic               PSgn;       // product sign
    // logic [2*`NF+1:0]   ProdMan2;   // product being added
    // logic [3*`NF+6:0]   AlignedAddend2; // possibly inverted aligned Z
    // logic [3*`NF+5:0]   Sum;        // positive sum
    // logic [3*`NF+6:0]   PreSum;     // possibly negitive sum
    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
    // logic [`NE+1:0]     SumExpTmp;  // exponent of the normalized sum not taking into account denormal or zero results
    // logic [`NE+1:0]     SumExpTmpMinus1;    // SumExpTmp-1
    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
    logic [`NF+2:0]     NormSum;        // normalized sum
    // logic [3*`NF+5:0]   SumShifted; // sum shifted for normalization
    // logic [8:0]         NormCnt;    // output of the leading zero detector //***change this later
    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
    logic               SumZero;        // is the sum zero
    // logic               NegSum;     // is the sum negitive
    // logic               InvZ;       // invert Z if there is a subtraction (-product + Z or product - Z)
    logic               ResultDenorm;   // is the result denormalized
    logic               Sticky, UfSticky;           // Sticky bit
-    logic               Plus1, Minus1, CalcPlus1, CalcMinus1;   // do you add or subtract one for rounding
+    logic               Plus1, Minus1, CalcPlus1;   // do you add or subtract one for rounding
-    logic               UfPlus1, UfCalcPlus1;  // do you add one (for determining underflow flag)
+    logic               UfPlus1;                    // do you add one (for determining underflow flag)
-    logic               Invalid,Underflow,Overflow,Inexact; // flags
+    logic               Invalid,Underflow,Overflow; // flags
    // logic [8:0]         DenormShift;    // right shift if the result is denormalized //***change this later
    // logic               SubBySmallNum;  // was there supposed to be a subtraction by a small number
    logic [`FLEN-1:0]    Addend;     // value to add (Z or zero)
    logic               ZeroSgn;        // the result's sign if the sum is zero
    logic               ResultSgnTmp;   // the result's sign assuming the result is not zero
-    logic           Guard, Round, LSBNormSum;   // bits needed to determine rounding
+    logic               Guard, Round;   // bits needed to determine rounding
-    logic           UfGuard, UfRound, UfLSBNormSum;   // bits needed to determine rounding for underflow flag
+    logic               UfRound, UfLSBNormSum;   // bits needed to determine rounding for underflow flag
    // logic [`NE+1:0]    MaxExp;     // maximum value of the exponent
    // logic [`NE+1:0]    FracLen;    // length of the fraction
    logic           SigNaN;     // is an input a signaling NaN
    logic           UnderflowFlag;  // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
    //logic           ZSgnEffM;
            // ///////////////////////////////////////////////////////////////////////////////
            // // Addition
            // ///////////////////////////////////////////////////////////////////////////////
-            // // Negate Z  when doing one of the following opperations:
+    ///////////////////////////////////////////////////////////////////////////////
-            // //      -prod +  Z
+    // Normalization
-            // //       prod -  Z
+    ///////////////////////////////////////////////////////////////////////////////
            // assign ZSgnEffM = ZSgnM^FOpCtrlM[0]; // Swap sign of Z for subtract
            // assign InvZ = ZSgnEffM ^ PSgn;
            // // Choose an inverted or non-inverted addend - the one is added later
            // assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
            // // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
            // assign ProdMan2 = KillProdM ? 0 : ProdManM;
            // // Do the addition
            // //      - add one to negate if the added was inverted
            // //      - the 2 extra bits at the begining and end are needed for rounding
            // assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0} + InvZ;
            // // Is the sum negitive
            // assign NegSum = PreSum[3*`NF+6];
            // // If the sum is negitive, negate the sum.
            // assign Sum = NegSum ? -PreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
            // ///////////////////////////////////////////////////////////////////////////////
            // // Normalization
            // ///////////////////////////////////////////////////////////////////////////////
            // // Determine if the sum is zero
            // assign SumZero = ~(|Sum);
            // // determine the length of the fraction based on precision
            // assign FracLen = FmtM ? `NF : 13'd23;
            // //assign FracLen = `NF;
            // // Determine if the result is denormal
            // logic [`NE+1:0] SumExpTmpTmp;
            // assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
            // assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
            // assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
            // // Determine the shift needed for denormal results
            // assign SumExpTmpMinus1 = SumExpTmp-1;
            // assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 0; //*** change this when changing the size of DenormShift also change to an and opperation
            // // Normalize the sum
            // assign SumShifted = SumZero ? 0 : Sum << NormCnt+DenormShift; //*** fix mux's with constants in them
            // assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
            // // Calculate the sticky bit
            // assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+3:0]) : (|SumShifted[136:0]);
            // assign Sticky = AddendStickyM | NormSumSticky;
            // // Determine sum's exponent
            // assign SumExp = SumZero ? 0 : //***again fix mux
            //                 ResultDenorm ? 0 :
            //                 SumExpTmp;
    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
            .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
-                // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
-                // // Rounding
+    // Rounding
-                // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
-                // // round to nearest even
+    // round to nearest even
-                // //      {Guard, Round, Sticky}
+    // round to zero
-                // //      0xx - do nothing
+    // round to -infinity
-                // //      100 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+    // round to infinity
-                // //          - don't add 1 if a small number was supposed to be subtracted
+    // round to nearest max magnitude
                // //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
                // //      110/111 - Plus1
                // //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
                // //  round to -infinity
                // //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
                // //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
                // //  round to infinity
                // //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
                // //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
                // //  round to nearest max magnitude
                // //      {Guard, Round, Sticky}
                // //      0xx - do nothing
                // //      100 - tie - Plus1
                // //          - don't add 1 if a small number was supposed to be subtracted
                // //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
                // //      110/111 - Plus1
                // // determine guard, round, and least significant bit of the result
                // assign Guard = FmtM ? NormSum[2] : NormSum[31];
                // assign Round = FmtM ? NormSum[1] : NormSum[30];
                // assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
                // // used to determine underflow flag
                // assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
                // assign UfRound = FmtM ? NormSum[0] : NormSum[29];
                // assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
                // // Deterimine if a small number was supposed to be subtrated
                // assign SubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky) & ~ZZeroM;
                // always_comb begin
                //     // Determine if you add 1
                //     case (FrmM)
                //         3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfRound)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfRound)&LSBNormSum&~SubBySmallNum));//round to nearest even
                //         3'b001: CalcPlus1 = 0;//round to zero
                //         3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
                //         3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
                //         3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfRound)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfRound)&~SubBySmallNum)));//round to nearest max magnitude
                //         default: CalcPlus1 = 1'bx;
                //     endcase
                //     // Determine if you add 1 (for underflow flag)
                //     case (FrmM)
                //         3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even
                //         3'b001: UfCalcPlus1 = 0;//round to zero
                //         3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down
                //         3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up
                //         3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
                //         default: UfCalcPlus1 = 1'bx;
                //     endcase
                //     // Determine if you subtract 1
                //     case (FrmM)
                //         3'b000: CalcMinus1 = 0;//round to nearest even
                //         3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
                //         3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
                //         3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
                //         3'b100: CalcMinus1 = 0;//round to nearest max magnitude
                //         default: CalcMinus1 = 1'bx;
                //     endcase
                // end
                // // If an answer is exact don't round
                // assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
                // assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound);
                // assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round);
                // // Compute rounded result
                // logic [`FLEN:0] RoundAdd; //*** move this up 
                // logic [`NF-1:0] NormSumTruncated;
                // assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
                //                          Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
                // assign NormSumTruncated = FmtM ? NormSum[`NF+2:3] : {NormSum[54:32], 29'b0};
                // assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
                // assign ResultExp = FullResultExp[`NE-1:0];
    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp,
        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfRound, .UfLSBNormSum);
@ -467,38 +268,9 @@ module fma2(
-        // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
-        // // Flags
+    // Flags
-        // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
        // // Set Invalid flag for following cases:
        // //   1) any input is a signaling NaN
        // //   2) Inf - Inf (unless x or y is NaN)
        // //   3) 0 * Inf
        // assign MaxExp = FmtM ? {`NE{1'b1}} : {8{1'b1}};
        // assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
        // assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
        // // Set Overflow flag if the number is too big to be represented
        // //      - Don't set the overflow flag if an overflowed result isn't outputed
        // assign Overflow = FullResultExp >= {MaxExp} & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
        // // Set Underflow flag if the number is too small to be represented in normal numbers
        // //      - Don't set the underflow flag if the result is exact
        // assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky|UfRound)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
        // assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
        // // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
        // //      - Don't set the underflow flag if an underflowed result isn't outputed
        // assign Inexact = (Sticky|UfRound|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
        // // Combine flags
        // //      - FMA can't set the Divide by zero flag
        // //      - Don't set the underflow flag if the result was rounded up to a normal number
        // assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
    fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
        .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
@ -523,7 +295,7 @@ module fma2(
    assign FMAResM = XNaNM ? XNaNResult :
                        YNaNM ? YNaNResult :
                        ZNaNM ? ZNaNResult :
-                        Invalid ? InvalidResult : // has to be before inf
+                        Invalid ? InvalidResult :
                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
@ -537,6 +309,11 @@ module fma2(
 endmodule
 module mult(
    input logic [`NF:0] XManE, YManE,
    output logic [2*`NF+1:0] ProdManE
@ -544,22 +321,26 @@ module mult(
    assign ProdManE = XManE * YManE;
 endmodule
 module alignshift(
    input logic  [`NE-1:0]      ZExpE,      // biased exponents in B(NE.0) format
    input logic  [`NF:0]        ZManE,      // fractions in U(0.NF) format]
    input logic                 ZDenormE,   // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic [`NE+1:0] ProdExpE,
+    input logic  [`NE+1:0]      ProdExpE,       // the product's exponent
-    input logic [`NE-1:0] Denorm,
+    input logic  [`NE-1:0]      Denorm,         // the biased value of a denormalized number
-    output logic [3*`NF+5:0] AlignedAddendE,
+    output logic [3*`NF+5:0]    AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
-    output logic AddendStickyE,
+    output logic                AddendStickyE,  // Sticky bit calculated from the aliged addend
-    output logic KillProdE
+    output logic                KillProdE       // should the product be set to zero
 );
    logic [`NE+1:0]     AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
    logic [4*`NF+5:0]   ZManShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
    logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter U(NF+5.3NF+1)
-    logic [`NE-1:0]   DenormZExp;
+    logic [`NE-1:0]     ZExpVal;            // Exponent value after taking into account denormals
    ///////////////////////////////////////////////////////////////////////////////
    // Alignment shifter
@ -568,14 +349,13 @@ module alignshift(
    // determine the shift count for alignment
    //      - negitive means Z is larger, so shift Z left
    //      - positive means the product is larger, so shift Z right
-    //      - Denormal numbers have an an exponent value of 1, however they are
+    //      - Denormal numbers have a diffrent exponent value depending on the precision
-    //        represented with an exponent of 0. add one to the exponent if it is a denormal number
+    assign ZExpVal = ZDenormE ? Denorm : ZExpE;
-    assign DenormZExp = ZDenormE ? Denorm : ZExpE;
+    assign AlignCnt = ProdExpE - ZExpVal + (`NF+3);
    assign AlignCnt = ProdExpE - DenormZExp + (`NF+3);
    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
-    //                       |1'b0| addnend |
+    //          | addnend |
    // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
    assign ZManPreShifted = {ZManE,(3*`NF+5)'(0)};
@ -588,20 +368,10 @@ module alignshift(
        //  | addnend |
        if ($signed(AlignCnt) < $signed(0)) begin
            KillProdE = 1;
-            ZManShifted = ZManPreShifted;//{107'b0, XManE, 54'b0};
+            ZManShifted = ZManPreShifted;
            AddendStickyE = ~(XZeroE|YZeroE);
-        // // If the Addend is shifted left (negitive AlignCnt)
+        // If the Addend is shifted right
        // //          |   54'b0    |  106'b(product)  | 2'b0 |
        // //                  | addnend |
        // end else if($signed(AlignCnt) <= $signed(0))  begin
        //     KillProdE = 0;
        //     ZManShifted = ZManPreShifted << -AlignCnt;
        //     AddendStickyE = |(ZManShifted[`NF-1:0]);
        // If the Addend is shifted right (positive AlignCnt)
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //                                  | addnend |
        end else if ($signed(AlignCnt)<=$signed(3*`NF+4))  begin
@ -622,25 +392,27 @@ module alignshift(
        end
    end
    assign AlignedAddendE = ZManShifted[4*`NF+5:`NF];
 endmodule
 module fmaadd(
-    input logic     [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition
+    input logic  [3*`NF+5:0]    AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
-    input logic [2*`NF+1:0] ProdManE,
+    input logic  [2*`NF+1:0]    ProdManE,       // the product's mantissa
-    input logic PSgnE, ZSgnEffE,
+    input logic                 PSgnE, ZSgnEffE,// the product and modified Z signs
-    input logic KillProdE,
+    input logic                 KillProdE,      // should the product be set to 0
-    input logic XZeroE, YZeroE,
+    input logic                 XZeroE, YZeroE, // is the input zero
-    output logic [3*`NF+5:0]   SumE,
+    output logic [3*`NF+5:0]    SumE,           // the positive sum
-    output logic NegSumE,
+    output logic                NegSumE,        // was the sum negitive
-    output logic InvZE,
+    output logic                InvZE,          // do you invert Z
-    output logic [8:0] NormCntE
+    output logic [8:0]          NormCntE        // normalization shift count
 );
    logic [3*`NF+6:0]   PreSum, NegPreSum;  // possibly negitive sum
    logic [2*`NF+1:0]   ProdMan2;           // product being added
    logic [3*`NF+6:0]   AlignedAddend2;     // possibly inverted aligned Z
-    logic [3*`NF+6:0]   NegProdMan2;
+    logic [3*`NF+6:0]   NegProdMan2;        // a negated ProdMan2
-    logic [8:0] PNormCnt, NNormCnt;
+    logic [8:0]         PNormCnt, NNormCnt; // results from the LZA
    ///////////////////////////////////////////////////////////////////////////////
    // Addition
@ -651,83 +423,46 @@ module fmaadd(
    //       prod -  Z
    assign InvZE = ZSgnEffE ^ PSgnE;
-    // Choose an inverted or non-inverted addend - the one is added later
+    // Choose an inverted or non-inverted addend - the one has to be added now for the LZA
    assign AlignedAddend2 = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
    assign ProdMan2 = ProdManE&{2*`NF+2{~KillProdE}};
    // Negate ProdMan for LZA and the negitive sum calculation
    assign NegProdMan2 = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdMan2, 2'b0};
    // LZAs one for the positive result and one for the negitive
    //      - the +1 from inverting causes problems for normalization
    poslza poslza(AlignedAddend2, ProdMan2, PNormCnt);
    neglza neglza({1'b0,AlignedAddendE}, NegProdMan2, NNormCnt);
    // Do the addition
-    //      - add one to negate if the added was inverted
+    //      - calculate a positive and negitive sum in parallel
    //      - the 2 extra bits at the begining and end are needed for rounding
    assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0};
    assign NegPreSum = AlignedAddendE + NegProdMan2;
    // Is the sum negitive
    assign NegSumE = PreSum[3*`NF+6];
-    // If the sum is negitive, negate the sum.
+    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
 // set to PNormCnt if the product is zero (there may be an additional bit of error from the negation)
 endmodule
 // module fmalzc(
 //     input logic [3*`NF+5:0]   Sum,
 //     output logic [8:0] NormCntCheck
 // );
 //     ///////////////////////////////////////////////////////////////////////////////
 //     // Leading one detector
 //     ///////////////////////////////////////////////////////////////////////////////
 //     //*** replace with non-behavoral code
 //     logic [8:0] i;
 //     always_comb begin
 //             i = 0;
 //             while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
 //             NormCntCheck = i;
 //     end
 // endmodule
 ////////////////////////////////////////////////////////////////////////////////////
 //	Filename: 	lza.v
 //	Author:		Katherine Parry
 //  Date:		2021/02/07
 //
 // Description:  Leading Zero Anticipator
 // This a the Kershaw Leading Zero Anticipator(LZA) using the algorithm described in
 // "Leading Zero Anticipation and Dectection - A Comparison of Methods" (2001)
 //    Schmookler and Nowka.
 // After swapping, alignment and inversion of A & B, the following functions are 
 // applied to all 'i' bits. 
 //   -- T[i] =   A[i] XOR B[i];  // Propagation that will occur
 //   -- G[i] =   A[i] AND B[i];  // The value Generated
 //   -- Z[i] = ~(A[i] OR  B[i]): // Fill functions
 // The leading Zero is determined by the first occurance of the pattern T*GGZ*,
 // whereas Leading ones are found by the pattern T*ZG*
 // To evaluate the pattern we map it to the function that evaluates the three bits 
 //     (current, before, & after): 
 //  f[i] = T[i-1](G[i]~Z[i+1] & ~G[i+1]Z[i]) | ~T[i-1](Z[i]~Z[i+1] & G[i]~G[i+1])
 // 
 ////////////////////////////////////////////////////////////////////////////////////
 module poslza(
-    //   parameter SIGNIFICANT_SZ=52;
+    input logic  [3*`NF+6:0] A,     // addend
-    //leading digit anticipator
+    input logic  [2*`NF+1:0] P,     // product
-    //   localparam sz=SIGNIFICANT_SZ+1;
+    output logic [8:0]       PCnt   // normalization shift count for the positive result
    input logic [3*`NF+6:0] A, 
    input logic [2*`NF+1:0] P,
    output logic [8:0] PCnt
    ); 
    // Compute Generate, Propageate and Kill for each bit
    // calculate the propagate (T) and kill (Z) bits
    logic [3*`NF+6:0] T;
    logic [3*`NF+5:0] Z;
    // assign T = A^{{`NF+3{1'b0}}, P, 2'b0};
    // assign Z = ~(A|{{`NF+3{1'b0}}, P, 2'b0});
    assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
    assign Z[3*`NF+5:2*`NF+4] = A[3*`NF+5:2*`NF+4];
    assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
@ -739,7 +474,6 @@ module poslza(
    // Apply function to determine Leading pattern
    logic [3*`NF+6:0] pf;
    assign pf = T^{Z[3*`NF+5:0], 1'b0};
    // assign pf = T^{~Z[3*`NF+5:0], 1'b0};
    logic [8:0] i;
    always_comb begin
@ -751,16 +485,12 @@ module poslza(
 endmodule
 module neglza(
-    //   parameter SIGNIFICANT_SZ=52;
+    input logic  [3*`NF+6:0]    A,      // addend
-    //leading digit anticipator
+    input logic  [3*`NF+6:0]    P,      // product
-    //   localparam sz=SIGNIFICANT_SZ+1;
+    output logic [8:0]          NCnt    // normalization shift count for the negitive result
    input logic [3*`NF+6:0] A, 
    input logic [3*`NF+6:0] P,
    output logic [8:0] NCnt
    ); 
-    // Compute Generate, Propageate and Kill for each bit
+    // calculate the propagate (T) and kill (Z) bits
    logic [3*`NF+6:0] T;
    logic [3*`NF+5:0] Z;
    assign T = A^P;
@ -783,28 +513,27 @@ endmodule
 module normalize(
-    input logic [3*`NF+5:0]   SumM,
+    input logic  [3*`NF+5:0]    SumM,       // the positive sum
-    input logic [`NE-1:0] ZExpM,
+    input logic  [`NE-1:0]      ZExpM,      // exponent of Z
    input logic  [`NE+1:0]      ProdExpM,   // X exponent + Y exponent - bias
-    input logic [8:0] NormCntM,
+    input logic  [8:0]          NormCntM,   // normalization shift count
    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic KillProdM,
+    input logic                 KillProdM,  // is the product set to zero
-    input logic AddendStickyM,
+    input logic                 AddendStickyM,  // the sticky bit caclulated from the aligned addend
    output logic [`NF+2:0]      NormSum,        // normalized sum
-    output logic SumZero,
+    output logic                SumZero,        // is the sum zero
-    output logic NormSumSticky, UfSticky,
+    output logic                NormSumSticky, UfSticky,    // sticky bits
    output logic [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    output logic ResultDenorm
+    output logic                ResultDenorm    // is the result denormalized
 );
    logic [`NE+1:0]     FracLen;            // length of the fraction
    logic [`NE+1:0]     SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
    logic [`NE+1:0]     SumExpTmpMinus1;    // SumExpTmp-1
    logic [8:0]         DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [3*`NF+5:0]   SumShifted; // sum shifted for normalization
+    logic [3*`NF+5:0]   CorrSumShifted;     // the shifted sum after LZA correction
-    logic [3*`NF+7:0]   SumShiftedTmp; // sum shifted for normalization
+    logic [3*`NF+7:0]   SumShifted;         // the shifted sum before LZA correction
-    logic [`NE+1:0] SumExpTmpTmp;
+    logic [`NE+1:0]     SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
-    logic PreResultDenorm;
+    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic LZAPlus1;
+    logic               LZAPlus1;           // add one to the sum's exponent due to LZA correction
    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
@ -815,87 +544,58 @@ module normalize(
    // determine the length of the fraction based on precision
    assign FracLen = FmtM ? `NF+1 : 13'd24;
    //assign FracLen = `NF;
-    // Determine if the result is denormal
+    // calculate the sum's exponent
    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
    // Determine if the result is denormal
    assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
    assign DenormShift = PreResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
    // Normalize the sum
-    assign SumShiftedTmp = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
    // LZA correction
-    assign LZAPlus1 = SumShiftedTmp[3*`NF+7];
+    assign LZAPlus1 = SumShifted[3*`NF+7];
-    assign SumShifted =  LZAPlus1 ? SumShiftedTmp[3*`NF+6:1] : SumShiftedTmp[3*`NF+5:0];
+    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
-    assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
+    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
    // Calculate the sticky bit
-    assign NormSumSticky = (|SumShifted[2*`NF+2:0]) | (|SumShifted[136:2*`NF+3]&~FmtM);
+    assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM);
    assign UfSticky = AddendStickyM | NormSumSticky;
    // Determine sum's exponent
-    assign SumExp = (SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShiftedTmp[3*`NF+6])) & {`NE+2{~(SumZero|ResultDenorm)}};
+    assign SumExp = (SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShifted[3*`NF+6])) & {`NE+2{~(SumZero|ResultDenorm)}};
-// recalculate if the result is denormalized
+    // recalculate if the result is denormalized
-assign ResultDenorm = PreResultDenorm&~SumShiftedTmp[3*`NF+6]&~SumShiftedTmp[3*`NF+7];
+    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
    // // Determine if the sum is zero
    // assign SumZero = ~(|Sum);
    // // determine the length of the fraction based on precision
    // assign FracLen = FmtM ? `NF : 13'd23;
    // //assign FracLen = `NF;
    // // Determine if the result is denormal
    // assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} + 1 - (`NF+4));
    // assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
    // assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
    // // Determine the shift needed for denormal results
    // //  - if not denorm add 1 to shift out the leading 1
    // assign DenormShift = ResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
    // // Normalize the sum
    // assign SumShifted = SumZero ? 0 : Sum << NormCnt+DenormShift; //*** fix mux's with constants in them
    // assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
    // // Calculate the sticky bit
    // assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+2:0]) : (|SumShifted[136:0]);
    // assign UfSticky = AddendStickyM | NormSumSticky;
    // // Determine sum's exponent
    // assign SumExp = SumZero ? 0 : //***again fix mux
    //              ResultDenorm ? 0 :
    //              SumExpTmp;
 endmodule
 module fmaround(
    input logic             FmtM,       // precision 1 = double 0 = single
-    input logic [2:0] FrmM,
+    input logic  [2:0]      FrmM,       // rounding mode
-    input logic UfSticky,
+    input logic             UfSticky,   // sticky bit for underlow calculation
    output logic Sticky,
    input logic  [`NF+2:0]  NormSum,    // normalized sum
-    input logic AddendStickyM,
+    input logic             AddendStickyM,  // addend's sticky bit
-    input logic NormSumSticky,
+    input logic             NormSumSticky,  // normalized sum's sticky bit
-    input logic ZZeroM,
+    input logic             ZZeroM,         // is Z zero
-    input logic InvZM,
+    input logic             InvZM,          // invert Z
    input logic  [`NE+1:0]  SumExp,         // exponent of the normalized sum
-    input logic ResultSgn,
+    input logic             ResultSgn,      // the result's sign
-    output logic CalcPlus1, Plus1, UfPlus1, Minus1,
+    output logic            CalcPlus1, Plus1, UfPlus1, Minus1,  // do you add or subtract on from the result
    output logic [`NE+1:0]  FullResultExp,      // ResultExp with bits to determine sign and overflow
    output logic [`NF-1:0]  ResultFrac,         // Result fraction
    output logic [`NE-1:0]  ResultExp,          // Result exponent
-    output logic Round, Guard, UfRound, UfLSBNormSum
+    output logic            Sticky,             // sticky bit
    output logic            Round, Guard, UfRound, UfLSBNormSum // bits needed to calculate rounding
 );
-    logic LSBNormSum;
+    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic UfGuard;
+    logic           UfGuard;            // gaurd bit used to caluculate underflow
-    logic UfCalcPlus1, CalcMinus1;
+    logic           UfCalcPlus1, CalcMinus1;    // do you add or subtract on from the result
-    logic [`FLEN:0] RoundAdd; //*** move this up 
+    logic [`FLEN:0] RoundAdd;           // how much to add to the result
-    logic [`NF-1:0] NormSumTruncated;
+    logic [`NF-1:0] NormSumTruncated;   // the normalized sum trimed to fit the mantissa
    ///////////////////////////////////////////////////////////////////////////////
    // Rounding
@ -997,15 +697,15 @@ module fmaflags(
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic  [`NE+1:0]      FullResultExp,          // ResultExp with bits to determine sign and overflow
    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
-    input logic ZSgnEffM, PSgnM,
+    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
-    input logic Round, Guard, UfRound, UfLSBNormSum, Sticky, UfPlus1,
+    input logic                 Round, Guard, UfRound, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
    input logic                 FmtM,                   // precision 1 = double 0 = single
-    output logic Invalid, Overflow, Underflow,
+    output logic                Invalid, Overflow, Underflow, // flags used to select the result
-    output logic [4:0] FMAFlgM
+    output logic [4:0]          FMAFlgM // FMA flags
 );
    logic [`NE+1:0]     MaxExp;     // maximum value of the exponent
-    logic SigNaN;
+    logic               SigNaN;     // is an input a signaling NaN
-    logic UnderflowFlag, Inexact;
+    logic               UnderflowFlag, Inexact; // flags
    ///////////////////////////////////////////////////////////////////////////////
    // Flags
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -225,7 +225,7 @@ module fpu (
 		 .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
     .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
     .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-		 .FOpCtrlE, .FOpCtrlM, 
+		 .FOpCtrlE,
 		 .FmtE, .FmtM, .FrmM, 
     // outputs:
     .FMAFlgM, .FMAResM);
@ -257,19 +257,7 @@ module fpu (
                   // outputs:
 			             .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
-	
+	// convert from signle to double and vice versa
 	// add/FP <-> FP convert
  //    - computation is done in two stages
  //    - contains some E/M pipleine registers
  //*** remove uneeded logic
  //*** change to use the unpacking unit if possible
 // 	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
 //    .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
 //    .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
 //                   // outputs:
 //                   .CvtFpResM, .CvtFpFlgM);
 	cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
 	// compare unit