From 4b50ffac916130ee3fc8727a785dee25fdd9cd32 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 23 Dec 2022 17:01:34 -0600
Subject: [PATCH] reworked negitive sticky bit handeling in fma

---
 pipelined/regression/wave-fpu.do    | 18 ------------------
 pipelined/src/fpu/fma/fmaadd.sv     | 15 +++++++++------
 pipelined/src/fpu/fma/fmaalign.sv   |  5 ++---
 pipelined/testbench/testbench-fp.sv |  8 ++++----
 4 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 87545e31..05ccb215 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -9,7 +9,6 @@ add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
 add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/FDivBusyE
-add wave -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtfsm/state
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
@@ -20,22 +19,5 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/WC
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/WS
-#add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/WCA
-#add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/WSA
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/U
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/UM
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/UNext
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/UMNext
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/*
-# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/interations[0]/stage/fdivsqrtstage/*
-# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/interations[0]/stage/fdivsqrtstage/otfc/otfc2/*
-# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/interations[0]/stage/fdivsqrtstage/qsel/qsel2/*
-# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtiter/interations[0]/fdivsqrtstage/stage/genblk1/qsel4/*
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtpreproc/*
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtpreproc/expcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/fdivsqrt/fdivsqrt/fdivsqrtfsm/*
-add wave -group {Sqrt} -noupdate -recursive /testbenchfp/fdivsqrt/fdivsqrt/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv
index c040c241..adb8f450 100644
--- a/pipelined/src/fpu/fma/fmaadd.sv
+++ b/pipelined/src/fpu/fma/fmaadd.sv
@@ -46,7 +46,9 @@ module fmaadd(
     output logic [3*`NF+5:0]    Sm           // the positive sum
 );
     logic [3*`NF+5:0]    PreSum, NegPreSum; // possibly negitive sum
+    logic [3*`NF+5:0]    PreSumdebug, NegPreSumdebug; // possibly negitive sum
     logic                NegSum;        // was the sum negitive
+    logic                NegSumdebug;        // was the sum negitive
 
     ///////////////////////////////////////////////////////////////////////////////
     // Addition
@@ -58,12 +60,13 @@ module fmaadd(
     assign PmKilled = KillProd ? '0 : Pm;
     // Do the addition
     //      - calculate a positive and negitive sum in parallel
-    //              Zsticky             Psticky
-    // PreSum    -1 = don't add 1     +1 = add 2
-    // NegPreSum +1 = add 2           -1 = don't add 1
-    // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
-    assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
-    assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+4)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
+    // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum
+    //      prod - addend where some of the addend is put into the sticky bit then don't add +1 from negation 
+    //          ie ~(InvA&ZmSticky&~KillProd)&InvA = (~ZmSticky|KillProd)&InvA
+    //      addend - prod where product is killed (and not exactly zero) then don't add +1 from negation 
+    //          ie ~(InvA&ZmSticky&KillProd)&InvA = (~ZmSticky|~KillProd)&InvA
+    assign {NegSum, PreSum} = {{`NF+3{1'b0}}, PmKilled, 2'b0} + {InvA, AmInv} + {{3*`NF+6{1'b0}}, (~ZmSticky|KillProd)&InvA};
+    assign NegPreSum = Am + {{`NF+2{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+3)'(0), (~ZmSticky|~KillProd)&InvA, 2'b0};
      
     // Choose the positive sum and accompanying LZA result.
     assign Sm = NegSum ? NegPreSum : PreSum;
diff --git a/pipelined/src/fpu/fma/fmaalign.sv b/pipelined/src/fpu/fma/fmaalign.sv
index 6c657738..e423c19d 100644
--- a/pipelined/src/fpu/fma/fmaalign.sv
+++ b/pipelined/src/fpu/fma/fmaalign.sv
@@ -55,11 +55,10 @@ module fmaalign(
     // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
     assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
 
-    // Defualt Addition without shifting
+    // Defualt Addition with only inital left shift
     //          |   54'b0    |  106'b(product)  | 2'b0 |
     //          | addnend |
 
-    // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
     assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
     
     assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
@@ -77,7 +76,7 @@ module fmaalign(
             ZmSticky = ~(XZero|YZero);
 
         // If the addend is too small to effect the addition        
-        //      - The addend has to shift two past the end of the addend to be considered too small
+        //      - The addend has to shift two past the end of the product to be considered too small
         //      - The 2 extra bits are needed for rounding
 
         //          |   54'b0    |  106'b(product)  | 2'b0 |
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 0d310089..b5c93dc9 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -689,8 +689,8 @@ module testbenchfp;
             .Xe(Xe), .Ye(Ye), .Ze(Ze), 
             .Xm(Xm), .Ym(Ym), .Zm(Zm),
             .XZero, .YZero, .ZZero, .Ss, .Se,
-            .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .SCnt, .As, .Ps,
-            .Pe, .ZmSticky, .KillProd); 
+            .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .InvA, .SCnt, .As, .Ps,
+            .ZmSticky); 
   end
               
   postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
@@ -700,8 +700,8 @@ module testbenchfp;
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-              .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se),
-              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+              .FmaZmS(ZmSticky), .FmaSe(Se),
+              .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
               .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   
   if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt