removed ethe second bit from fma alignment shift

2022-12-30 12:07:44 -06:00 · 2022-12-30 12:07:44 -06:00 · aca6f0d4e6
commit aca6f0d4e6
parent 3adb8efb2b
11 changed files with 137 additions and 131 deletions
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -104,9 +104,9 @@
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+7) ? (`QLEN+`NF+1) : (3*`NF+7))//change
+`define NORMSHIFTSZ ((`QLEN+`NF+1) > (3*`NF+6) ? (`QLEN+`NF+1) : (3*`NF+6))
 `define LOGNORMSHIFTSZ ($clog2(`NORMSHIFTSZ))
-`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+7) ? (`DIVRESLEN+`NF) : (3*`NF+5))//change
+`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+6) ? (`DIVRESLEN+`NF) : (3*`NF+4))

 // division constants
 `define RADIX 32'h4
--- a/pipelined/src/fpu/fma/fma.sv
+++ b/pipelined/src/fpu/fma/fma.sv
@ -31,27 +31,37 @@
 `include "wally-config.vh"

 module fma(
-    input logic                 Xs, Ys, Zs,    // input's signs
-    input logic  [`NE-1:0]      Xe, Ye, Ze,    // input's biased exponents in B(NE.0) format
-    input logic  [`NF:0]        Xm, Ym, Zm,    // input's significands in U(0.NF) format
-    input logic                 XZero, YZero, ZZero, // is the input zero
-    input logic  [2:0]          OpCtrl,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
-    output logic                ASticky,  // sticky bit that is calculated during alignment
-    output logic [3*`NF+4:0]    Sm,//change           // the positive sum's significand
-    output logic                InvA,          // Was A inverted for effective subtraction (P-A or -P+A)
-    output logic                As,       // the aligned addend's sign (modified Z sign for other opperations)
-    output logic                Ps,          // the product's sign
-    output logic                Ss,          // the sum's sign
-    output logic [`NE+1:0]      Se,
-    output logic [$clog2(3*`NF+6)-1:0]          SCnt//change        // normalization shift count
+    input logic                         Xs, Ys, Zs, // input's signs
+    input logic  [`NE-1:0]              Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
+    input logic  [`NF:0]                Xm, Ym, Zm, // input's significands in U(0.NF) format
+    input logic                         XZero, YZero, ZZero, // is the input zero
+    input logic  [2:0]                  OpCtrl,   // operation control
+    output logic                        ASticky,  // sticky bit that is calculated during alignment
+    output logic [3*`NF+3:0]            Sm,   // the positive sum's significand
+    output logic                        InvA, // Was A inverted for effective subtraction (P-A or -P+A)
+    output logic                        As,   // the aligned addend's sign (modified Z sign for other opperations)
+    output logic                        Ps,   // the product's sign
+    output logic                        Ss,   // the sum's sign
+    output logic [`NE+1:0]              Se,   // the sum's exponent
+    output logic [$clog2(3*`NF+5)-1:0]  SCnt  // normalization shift count
 );

-    logic [2*`NF+1:0]   Pm;           // the product's significand in U(2.2Nf) format
-    logic [3*`NF+4:0]   Am;//change     // addend aligned's mantissa for addition in U(NF+5.2NF+1)
-    logic [3*`NF+4:0]   AmInv; //change   // aligned addend's mantissa possibly inverted
-    logic [2*`NF+1:0]   PmKilled;      // the product's mantissa possibly killed
-    logic               KillProd;  // set the product to zero before addition if the product is too small to matter
-    logic [`NE+1:0]     Pe;       // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
+    //  OpCtrl:
+    //    Fma: {not multiply-add?, negate prod?, negate Z?}
+    //        000 - fmadd
+    //        001 - fmsub
+    //        010 - fnmsub
+    //        011 - fnmadd
+    //        100 - mul
+    //        110 - add
+    //        111 - sub
+
+    logic [2*`NF+1:0]   Pm;          // the product's significand in U(2.2Nf) format
+    logic [3*`NF+3:0]   Am;         // addend aligned's mantissa for addition in U(NF+4.2NF)
+    logic [3*`NF+3:0]   AmInv;      // aligned addend's mantissa possibly inverted
+    logic [2*`NF+1:0]   PmKilled;   // the product's mantissa possibly killed U(2.2Nf)
+    logic               KillProd;   // set the product to zero before addition if the product is too small to matter
+    logic [`NE+1:0]     Pe;         // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign

    ///////////////////////////////////////////////////////////////////////////////
    // Calculate the product
@ -68,25 +78,23 @@ module fma(
    // multiplication of the mantissa's
    fmamult mult(.Xm, .Ym, .Pm);
   
-    ///////////////////////////////////////////////////////////////////////////////
-    // Alignment shifter
-    ///////////////////////////////////////////////////////////////////////////////
    // calculate the signs and take the opperation into account
    fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);

+    ///////////////////////////////////////////////////////////////////////////////
+    // Alignment shifter
+    ///////////////////////////////////////////////////////////////////////////////
    fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
                .Am, .ASticky, .KillProd);
                        
-
-
    // ///////////////////////////////////////////////////////////////////////////////
    // // Addition/LZA
    // ///////////////////////////////////////////////////////////////////////////////
        
    fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);

-    //change
-    fmalza #(3*`NF+5) lza(.A(AmInv), .Pm({PmKilled, 1'b0, InvA&Ps&ASticky&KillProd}), .Cin(InvA & ~(ASticky & ~KillProd)), .sub(InvA), .SCnt);
+    fmalza #(3*`NF+4) lza(.A(AmInv), .Pm({PmKilled, InvA&Ps&ASticky&KillProd}), .Cin(InvA & ~(ASticky & ~KillProd)), .sub(InvA), .SCnt);
+    
 endmodule


--- a/pipelined/src/fpu/fma/fmaadd.sv
+++ b/pipelined/src/fpu/fma/fmaadd.sv
@ -31,7 +31,7 @@
 `include "wally-config.vh"

 module fmaadd(
-    input logic  [3*`NF+4:0]    Am, //change // aligned addend's mantissa for addition in U(NF+5.2NF+1)
+    input logic  [3*`NF+3:0]    Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
    input logic  [2*`NF+1:0]    Pm,       // the product's mantissa
    input logic                 Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
    input logic                InvA,          // invert the aligned addend
@ -39,13 +39,13 @@ module fmaadd(
    input logic                 ASticky,
    input logic  [`NE-1:0]      Ze,
    input logic  [`NE+1:0]      Pe,
-    output logic [3*`NF+4:0]    AmInv,//change // aligned addend possibly inverted
+    output logic [3*`NF+3:0]    AmInv, // aligned addend possibly inverted
    output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
    output logic                Ss,          
    output logic [`NE+1:0]      Se,
-    output logic [3*`NF+4:0]    Sm//change           // the positive sum
+    output logic [3*`NF+3:0]    Sm          // the positive sum
 );
-    logic [3*`NF+4:0]    PreSum, NegPreSum;//change // possibly negitive sum
+    logic [3*`NF+3:0]    PreSum, NegPreSum; // possibly negitive sum
    logic [3*`NF+5:0]    PreSumdebug, NegPreSumdebug; // possibly negitive sum
    logic                NegSum;        // was the sum negitive
    logic                NegSumdebug;        // was the sum negitive
@ -66,8 +66,8 @@ module fmaadd(
    //      addend - prod where product is killed (and not exactly zero) then don't add +1 from negation 
    //          ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
    //          in this case this result is only ever selected when InvA=1 so we can remove &InvA
-    assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+5{1'b0}}, (~ASticky|KillProd)&InvA};//change
-    assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 2'b0};//change
+    assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
+    assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0};
     
    // Choose the positive sum and accompanying LZA result.
    assign Sm = NegSum ? NegPreSum : PreSum;
--- a/pipelined/src/fpu/fma/fmaalign.sv
+++ b/pipelined/src/fpu/fma/fmaalign.sv
@ -35,16 +35,15 @@ module fmaalign(
    input logic  [`NE-1:0]      Xe, Ye, Ze,      // biased exponents in B(NE.0) format
    input logic  [`NF:0]        Zm,      // significand in U(0.NF) format]
    input logic                 XZero, YZero, ZZero, // is the input zero
-    output logic [3*`NF+4:0]    Am,//change // addend aligned for addition in U(NF+5.2NF+1)
+    output logic [3*`NF+3:0]    Am, // addend aligned for addition in U(NF+5.2NF+1)
    output logic                ASticky,  // Sticky bit calculated from the aliged addend
    output logic                KillProd       // should the product be set to zero
 );

    logic [`NE+1:0]     ACnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
-    logic [4*`NF+4:0]   ZmShifted;//change        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
-    logic [4*`NF+4:0]   ZmPreshifted;//change     // input to the alignment shifter U(NF+5.3NF+1)
+    logic [4*`NF+3:0]   ZmShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
+    logic [4*`NF+3:0]   ZmPreshifted;     // input to the alignment shifter U(NF+5.3NF+1)
    logic KillZ;
-    logic PmSticky, tmpZmSticky;

    ///////////////////////////////////////////////////////////////////////////////
    // Alignment shifter
@ -57,38 +56,38 @@ module fmaalign(
    assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze};

    // Defualt Addition with only inital left shift
-    //          |   53'b0    |  106'b(product)  | 2'b0 |
+    //          |   53'b0    |  106'b(product)  | 1'b0 |
    //          | addnend |

-    assign ZmPreshifted = {Zm,(3*`NF+4)'(0)}; //change
+    assign ZmPreshifted = {Zm,(3*`NF+3)'(0)};
    
    assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
-    assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(4));//change
+    assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3));

    always_comb
        begin
        
        // If the product is too small to effect the sum, kill the product

-        //          |   54'b0    |  106'b(product)  | 2'b0 |
+        //          |   53'b0    |  106'b(product)  | 1'b0 |
        //  | addnend |
        if (KillProd) begin
-            ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+2)'(0)};//change
+            ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)};
            ASticky = ~(XZero|YZero);

        // If the addend is too small to effect the addition        
        //      - The addend has to shift two past the end of the product to be considered too small
        //      - The 2 extra bits are needed for rounding

-        //          |   54'b0    |  106'b(product)  | 2'b0 |
+        //          |   53'b0    |  106'b(product)  | 1'b0 |
        //                                                      | addnend |
        end else if (KillZ)  begin
            ZmShifted = 0;
            ASticky = ~ZZero;

        // If the Addend is shifted right
-        //          |   54'b0    |  106'b(product)  | 2'b0 |
-        //                                  | addnend |
+        //          |   53'b0    |  106'b(product)  | 1'b0 |
+        //                                    | addnend |
        end else begin
            ZmShifted = ZmPreshifted >> ACnt;
            ASticky = |(ZmShifted[`NF-1:0]); 
@ -96,7 +95,7 @@ module fmaalign(
        end
    end

-    assign Am = ZmShifted[4*`NF+4:`NF];//change
+    assign Am = ZmShifted[4*`NF+3:`NF];

 endmodule

--- a/pipelined/src/fpu/fma/fmalza.sv
+++ b/pipelined/src/fpu/fma/fmalza.sv
@ -31,18 +31,18 @@
 `include "wally-config.vh"

 module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
-    input logic [WIDTH-1:0] 	       A, // addend
-    input logic [2*`NF+3:0] 	       Pm, // product
-    input logic 		       Cin, // carry in
-    input logic sub,
-    output logic [$clog2(WIDTH+1)-1:0] SCnt   // normalization shift count for the positive result
+    input logic [WIDTH-1:0]             A,      // addend
+    input logic [2*`NF+2:0]             Pm,     // product
+    input logic 		                Cin,    // carry in
+    input logic                         sub,
+    output logic [$clog2(WIDTH+1)-1:0]  SCnt    // normalization shift count for the positive result
    ); 

   logic [WIDTH:0] 	       F;
   logic [WIDTH-1:0]  B, P, G, K;
    logic [WIDTH-1:0] Pp1, Gm1, Km1;

-    assign B = {{(`NF+1){1'b0}}, Pm};//change // Zero extend product
+    assign B = {{(`NF+1){1'b0}}, Pm}; // Zero extend product

    assign P = A^B;
    assign G = A&B;
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -109,14 +109,14 @@ module fpu (
   logic 		      XExpMaxE;                           // is the exponent all ones (max value)

   // Fma Signals
-   logic [3*`NF+4:0] SmE, SmM;//change             
-   logic 			   ZmStickyE, ZmStickyM;
+   logic [3*`NF+3:0] SmE, SmM;            
+   logic 			   FmaAStickyE, FmaAStickyM;
   logic [`NE+1:0]   SeE,SeM;
   logic 			   InvAE, InvAM;
   logic 			   AsE, AsM;
   logic 			   PsE, PsM;
   logic 			   SsE, SsM;
-   logic [$clog2(3*`NF+6)-1:0] SCntE, SCntM;//change
+   logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM;

   // Cvt Signals
   logic [`NE:0]           CeE, CeM;    // the calculated expoent
@ -258,7 +258,7 @@ module fpu (
            .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
            .Sm(SmE), 
            .InvA(InvAE), .SCnt(SCntE), 
-            .ASticky(ZmStickyE)); 
+            .ASticky(FmaAStickyE)); 

   // divide and squareroot
   //    - fdiv
@ -352,10 +352,10 @@ module fpu (
            {XsE, YsE, XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
            {XsM, YsM, XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});     
   flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
-   flopenrc #(3*`NF+5) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);//change 
-  flopenrc #($clog2(3*`NF+6)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, //change
-                           {ZmStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
-                           {ZmStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
+   flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
+  flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
+                           {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
+                           {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
   flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                           {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
                           {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
@ -375,7 +375,7 @@ module fpu (
   assign FpLoadStoreM = FResSelM[1];

   postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), 
-                           .FmaZmS(ZmStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
+                           .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), /*.DivDone(DivDoneM), */
                           .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
                           .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
--- a/pipelined/src/fpu/postproc/fmashiftcalc.sv
+++ b/pipelined/src/fpu/postproc/fmashiftcalc.sv
@ -30,18 +30,18 @@
 `include "wally-config.vh"

 module fmashiftcalc(
-    input logic  [3*`NF+4:0]            FmaSm,//change       // the positive sum
-    input logic  [$clog2(3*`NF+6)-1:0]  FmaSCnt,//change   // normalization shift count
-    input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
-    input logic [`NE+1:0] FmaSe,
-    output logic [`NE+1:0]              NormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
-    output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
-    output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
-    output logic [$clog2(3*`NF+6)-1:0]  FmaShiftAmt,//change   // normalization shift count
-    output logic [3*`NF+6:0]            FmaShiftIn//change        // is the sum zero
+    input logic  [3*`NF+3:0]            FmaSm,      // the positive sum
+    input logic  [$clog2(3*`NF+5)-1:0]  FmaSCnt,    // normalization shift count
+    input logic  [`FMTBITS-1:0]         Fmt,        // precision 1 = double 0 = single
+    input logic  [`NE+1:0]              FmaSe,      // sum's exponent
+    output logic [`NE+1:0]              NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
+    output logic                        FmaSZero,   // is the result denormalized - calculated before LZA corection
+    output logic                        FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection
+    output logic [$clog2(3*`NF+5)-1:0]  FmaShiftAmt,    // normalization shift count
+    output logic [3*`NF+5:0]            FmaShiftIn      // is the sum zero
 );
-    logic [`NE+1:0]             PreNormSumExp;       // the exponent of the normalized sum with the `FLEN bias
-    logic [`NE+1:0] BiasCorr;
+    logic [`NE+1:0] PreNormSumExp;  // the exponent of the normalized sum with the `FLEN bias
+    logic [`NE+1:0] BiasCorr;       // correction for bias

    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
@ -50,7 +50,7 @@ module fmashiftcalc(
    // Determine if the sum is zero
    assign FmaSZero = ~(|FmaSm);
    // calculate the sum's exponent
-    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+6)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);//change
+    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);

    //convert the sum's exponent into the proper percision
    if (`FPSIZES == 1) begin
@ -150,7 +150,7 @@ module fmashiftcalc(
    //  - shift once if killing a product and the result is denormalized
    assign FmaShiftIn = {2'b0, FmaSm};
    if (`FPSIZES == 1)
-        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+6)-1:0]+($clog2(3*`NF+6))'(`NF+2): FmaSCnt+1;//change
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1;
    else
-        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+6)-1:0]+($clog2(3*`NF+6))'(`NF+2)+BiasCorr[$clog2(3*`NF+6)-1:0]: FmaSCnt+1;//change
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1;
 endmodule
--- a/pipelined/src/fpu/postproc/postprocess.sv
+++ b/pipelined/src/fpu/postproc/postprocess.sv
@ -32,28 +32,27 @@

 module postprocess (
    // general signals
-    input logic                             Xs, Ys,  // input signs
+    input logic                             Xs, Ys,     // input signs
    input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
-    input logic  [2:0]                      Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic  [`FMTBITS-1:0]             Fmt,       // precision 1 = double 0 = single
-    input logic  [2:0]                      OpCtrl,       // choose which opperation (look below for values)
+    input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic  [`FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
+    input logic  [2:0]                      OpCtrl,     // choose which opperation (look below for values)
    input logic                             XZero, YZero, ZZero, // inputs are zero
    input logic                             XInf, YInf, ZInf,    // inputs are infinity
    input logic                             XNaN, YNaN, ZNaN,    // inputs are NaN
    input logic                             XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
-    input logic                             ZDenorm, // is the original precision denormalized
-    input logic  [1:0]                      PostProcSel, // select result to be written to fp register
+    input logic                             ZDenorm,        // is the original precision denormalized
+    input logic  [1:0]                      PostProcSel,    // select result to be written to fp register
    //fma signals
-    input logic                             FmaAs,   // the modified Z sign - depends on instruction
-    input logic                             FmaPs,      // the product's sign
-    input logic  [`NE+1:0]                  FmaSe,
-    input logic  [3*`NF+4:0]                FmaSm,//change      // the positive sum
-    input logic                             FmaZmS,  // sticky bit that is calculated during alignment
-    input logic                             FmaSs,
-    input logic  [$clog2(3*`NF+6)-1:0]      FmaSCnt,//change   // the normalization shift count
+    input logic                             FmaAs,  // the modified Z sign - depends on instruction
+    input logic                             FmaPs,  // the product's sign
+    input logic  [`NE+1:0]                  FmaSe,  // the sum's exponent
+    input logic  [3*`NF+3:0]                FmaSm,  // the positive sum
+    input logic                             FmaASticky, // sticky bit that is calculated during alignment
+    input logic                             FmaSs,  //
+    input logic  [$clog2(3*`NF+5)-1:0]      FmaSCnt,   // the normalization shift count
    //divide signals
    input logic                             DivS,
-//    input logic                             DivDone,
    input logic  [`NE+1:0]                  DivQe,
    input logic  [`DIVb:0]                  DivQm,
    // conversion signals
@ -89,10 +88,10 @@ module postprocess (
    // fma signals
    logic [`NE+1:0] FmaMe;     // exponent of the normalized sum
    logic FmaSZero;        // is the sum zero
-    logic [3*`NF+6:0] FmaShiftIn;//change        // shift input
+    logic [3*`NF+5:0] FmaShiftIn;        // shift input
    logic [`NE+1:0] NormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
    logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic [$clog2(3*`NF+6)-1:0] FmaShiftAmt;//change   // normalization shift count
+    logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;   // normalization shift count
    // division singals
    logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt;
    logic [`NORMSHIFTSZ-1:0] DivShiftIn;
@ -152,8 +151,8 @@ module postprocess (
    always_comb
        case(PostProcSel)
            2'b10: begin // fma
-                ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+6){1'b0}}, FmaShiftAmt};//change
-                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+7){1'b0}}};//change
+                ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
+                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
            end
            2'b00: begin // cvt
                ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
@ -193,7 +192,7 @@ module postprocess (
                          
    roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);

-    round round(.OutFmt, .Frm, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
+    round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
                .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
                .DivS, //.DivDone,
                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .S, .R, .G, .Me);
--- a/pipelined/src/fpu/postproc/round.sv
+++ b/pipelined/src/fpu/postproc/round.sv
@ -48,7 +48,7 @@ module round(
    input logic                     CvtResDenormUf,
    input logic                     CvtResUf,
    input logic  [`CORRSHIFTSZ-1:0] Mf,
-    input logic                     FmaZmS,  // addend's sticky bit
+    input logic                     FmaASticky,  // addend's sticky bit
    input logic  [`NE+1:0]          FmaMe,         // exponent of the normalized sum
    input logic                     Ms,      // the result's sign
    input logic  [`NE:0]            CvtCe,    // the calculated expoent
@ -175,7 +175,7 @@ module round(

    // only add the Addend sticky if doing an FMA opperation
    //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-    assign S = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
+    assign S = FmaASticky&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
    
    // determine round and LSB of the rounded value
    //      - underflow round bit is used to determint the underflow flag
--- a/pipelined/src/fpu/postproc/shiftcorrection.sv
+++ b/pipelined/src/fpu/postproc/shiftcorrection.sv
@ -43,7 +43,7 @@ module shiftcorrection(
    output logic [`NE+1:0]          Qe,
    output logic [`NE+1:0]          FmaMe         // exponent of the normalized sum
 );
-    logic [3*`NF+4:0]      CorrSumShifted;//change     // the shifted sum after LZA correction
+    logic [3*`NF+3:0]      CorrSumShifted;     // the shifted sum after LZA correction
    logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
    logic                  ResDenorm;    // is the result denormalized
    logic                  LZAPlus1; // add one or two to the sum's exponent due to LZA correction
@ -56,7 +56,7 @@ module shiftcorrection(
    assign CorrQmShifted = (LZAPlus1|(DivQe==1&~LZAPlus1)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
    // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
    always_comb
-        if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+5){1'b0}}};//change
+        if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
        else if (DivOp&~DivResDenorm)   Mf = CorrQmShifted;
        else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
    // Determine sum's exponent
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -53,39 +53,39 @@ module testbenchfp;
  logic [`FLEN*4+7:0] TestVectors[8388609:0];     // list of test vectors

  logic [1:0]           FmtVal;          // value of the current Fmt
-  logic [2:0]           UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
+  logic [2:0]           UnitVal, OpCtrlVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
  logic                 WriteIntVal;                // value of the current WriteInt
  logic [`FLEN-1:0]     X, Y, Z;                    // inputs read from TestFloat
  logic [`XLEN-1:0]     SrcA;                       // integer input
  logic [`FLEN-1:0]	    Ans;                        // correct answer from TestFloat
-  logic [`FLEN-1:0]	    Res;                                                // result from other units
-  logic [4:0]	 	        AnsFlg;                                             // correct flags read from testfloat
-  logic [4:0]	 	        ResFlg, Flg;                                                            // Result flags
-  logic	[`FMTBITS-1:0]  ModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
-  logic [`FLEN-1:0]     FpRes, FpCmpRes;  // Results from each unit
-  logic [`XLEN-1:0]     IntRes, CmpRes;  // Results from each unit
+  logic [`FLEN-1:0]	    Res;                        // result from other units
+  logic [4:0]	 	        AnsFlg;                     // correct flags read from testfloat
+  logic [4:0]	 	        ResFlg, Flg;                // Result flags
+  logic	[`FMTBITS-1:0]  ModFmt;                     // format - 10 = half, 00 = single, 01 = double, 11 = quad
+  logic [`FLEN-1:0]     FpRes, FpCmpRes;            // Results from each unit
+  logic [`XLEN-1:0]     IntRes, CmpRes;             // Results from each unit
  logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
  logic                 AnsNaN, ResNaN, NaNGood;
-  logic                 Xs, Ys, Zs;                     // sign of the inputs
-  logic [`NE-1:0]       Xe, Ye, Ze;                     // exponent of the inputs
-  logic [`NF:0]         Xm, Ym, Zm;                     // mantissas of the inputs
-  logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
-  logic                 XSNaN, YSNaN, ZSNaN;                  // is the input a signaling NaN
-  logic                 XDenorm, ZDenorm;            // is the input denormalized
-  logic                 XInf, YInf, ZInf;                   // is the input infinity
-  logic                 XZero, YZero, ZZero;                // is the input zero
-  logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
-  logic  [`CVTLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
-  logic        IntZero;
-  logic CvtResSgnE;
-  logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
+  logic                 Xs, Ys, Zs;                 // sign of the inputs
+  logic [`NE-1:0]       Xe, Ye, Ze;                 // exponent of the inputs
+  logic [`NF:0]         Xm, Ym, Zm;                 // mantissas of the inputs
+  logic                 XNaN, YNaN, ZNaN;           // is the input NaN
+  logic                 XSNaN, YSNaN, ZSNaN;        // is the input a signaling NaN
+  logic                 XDenorm, ZDenorm;           // is the input denormalized
+  logic                 XInf, YInf, ZInf;           // is the input infinity
+  logic                 XZero, YZero, ZZero;        // is the input zero
+  logic                 XExpMax, YExpMax, ZExpMax;  // is the input's exponent all ones  
+  logic  [`CVTLEN-1:0]  CvtLzcInE;                  // input to the Leading Zero Counter (priority encoder)
+  logic                 IntZero;
+  logic                 CvtResSgnE;
+  logic [`NE:0]         CvtCalcExpE;    // the calculated expoent
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
-	logic [`DIVb:0] Quot;
-  logic CvtResDenormUfE;
-  logic DivStart, FDivBusyE, OldFDivBusyE;
-  logic reset = 1'b0;
+	logic [`DIVb:0]       Quot;
+  logic                 CvtResDenormUfE;
+  logic                 DivStart, FDivBusyE, OldFDivBusyE;
+  logic                 reset = 1'b0;
  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
-  logic [`DURLEN-1:0] Dur;
+  logic [`DURLEN-1:0]   Dur;

  // in-between FMA signals
  logic                 Mult;
@ -94,17 +94,17 @@ module testbenchfp;
  logic [`NE+1:0]	      Se;
  logic 				        ASticky;
  logic 					      KillProd; 
-  logic [$clog2(3*`NF+6)-1:0]	SCnt;
-  logic [3*`NF+4:0]	    Sm;       
+  logic [$clog2(3*`NF+5)-1:0]	SCnt;
+  logic [3*`NF+3:0]	    Sm;       
  logic 			          InvA;
  logic 			          NegSum;
  logic 			          As;
  logic 			          Ps;
-  logic       DivSticky;
-  logic       DivDone;
-  logic       DivNegSticky;
-  logic [`NE+1:0] DivCalcExp;
-  logic divsqrtop;
+  logic                 DivSticky;
+  logic                 DivDone;
+  logic                 DivNegSticky;
+  logic [`NE+1:0]       DivCalcExp;
+  logic                 divsqrtop;


  ///////////////////////////////////////////////////////////////////////////////////////////////