From d058ec632942da35ca89cac6ec41dfdca61a6547 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 24 Jun 2022 19:41:40 +0000
Subject: [PATCH 01/23] added denormal input handeling - radix 4

---
 pipelined/src/fpu/divshiftcalc.sv   |  4 ++--
 pipelined/src/fpu/flags.sv          |  4 ++--
 pipelined/src/fpu/fpu.sv            |  8 ++++---
 pipelined/src/fpu/postprocess.sv    |  7 ++++--
 pipelined/src/fpu/round.sv          | 16 ++++++++-----
 pipelined/srt/srt-radix4.sv         | 35 +++++++++++++++++++----------
 pipelined/testbench/testbench-fp.sv | 13 ++++++-----
 7 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 57022e5ae..398e8f467 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -2,9 +2,9 @@
 
 module divshiftcalc(
     input logic  [`DIVLEN+2:0] Quot,
-    input logic  [`NE:0] DivCalcExpM,
+    input logic  [`NE+1:0] DivCalcExpM,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
-    output logic [`NE:0] CorrDivExp
+    output logic [`NE+1:0] CorrDivExp
 );
     
     assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 3268aa1fd..122df8b21 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -133,8 +133,8 @@ module flags(
 
     assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
 
-
-    assign DivByZero = YZeroM&DivOp;  
+    // if dividing by zero and not 0/0
+    assign DivByZero = YZeroM&DivOp&~XZeroM;  
 
     // Combine flags
     //      - to integer results do not set the underflow or overflow flags
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index ff83079a8..2f43b27d4 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -124,7 +124,9 @@ module fpu (
    
    //divide signals
    logic [`DIVLEN+2:0] Quot;
-   logic [`NE:0] DivCalcExpM;
+   logic [`NE+1:0] DivCalcExpM;
+   logic DivNegStickyM;
+   logic DivStickyM;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -358,8 +360,8 @@ module fpu (
    postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
                            .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
                            .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
-                           .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, 
-                           .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, 
+                           .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
+                           .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM,
                            .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
 
    // FPU flag selection - to privileged
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index b77b013d1..9516e223f 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -51,8 +51,10 @@ module postprocess(
     input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
     input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
-    input logic [`NE:0]           DivCalcExpM,    // the calculated expoent
+    input logic [`NE+1:0]           DivCalcExpM,    // the calculated expoent
     input logic CvtResDenormUfM,
+    input logic DivStickyM,
+    input logic DivNegStickyM,
 	input logic [`LOGCVTLEN-1:0] CvtShiftAmtM,  // how much to shift by
     input logic                   CvtResSgnM,     // the result's sign
     input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
@@ -94,7 +96,7 @@ module postprocess(
     logic                   IntToFp;       // is the opperation an int->fp conversion?
     logic                   ToInt;      // is the opperation an fp->int conversion?
     logic [`NE+1:0] RoundExp;
-    logic [`NE:0] CorrDivExp;
+    logic [`NE+1:0] CorrDivExp;
     logic [1:0] NegResMSBS;
     logic CvtOp;
     logic FmaOp;
@@ -179,6 +181,7 @@ module postprocess(
 
     round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
                 .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
+                .DivStickyM, .DivNegStickyM,
                 .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
 
     ///////////////////////////////////////////////////////////////////////////////
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 73395caed..7d4153118 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -24,7 +24,9 @@ module round(
     input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
     input logic                 RoundSgn,      // the result's sign
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
-    input logic [`NE:0]           CorrDivExp,    // the calculated expoent
+    input logic [`NE+1:0]           CorrDivExp,    // the calculated expoent
+    input logic                DivStickyM,             // sticky bit
+    input logic DivNegStickyM,
     output logic                UfPlus1,  // do you add or subtract on from the result
     output logic [`NE+1:0]      FullResExp,      // ResExp with bits to determine sign and overflow
     output logic [`NF-1:0]      ResFrac,         // Result fraction
@@ -149,7 +151,7 @@ module round(
 
     // only add the Addend sticky if doing an FMA opperation
     //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-    assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
+    assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp | DivStickyM&DivOp;
     
     // determine round and LSB of the rounded value
     //      - underflow round bit is used to determint the underflow flag
@@ -223,9 +225,11 @@ module round(
     assign Sticky = UfSticky | UfRound;
 
 
-    // Deterimine if a small number was supposed to be subtrated - For Fma calculation only
-    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
-    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
+    // Deterimine if a small number was supposed to be subtrated
+    //  - for FMA or if division has a negitive sticky bit
+    assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
+    assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;
+
 
     always_comb begin
         // Determine if you add 1
@@ -305,7 +309,7 @@ module round(
         case(PostProcSelM)
             2'b10: RoundExp = SumExp; // fma
             2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
-            2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
+            2'b01: RoundExp = CorrDivExp; // divide
             default: RoundExp = 0; 
         endcase
 
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index 8fd8d5419..eface008d 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -42,23 +42,27 @@ module srtradix4 (
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       DivDone,
+  output logic       DivStickyE,
+  output logic       DivNegStickyE,
   output logic [`DIVLEN+2:0] Quot,
   output logic [`XLEN-1:0] Rem, // *** later handle integers
-  output logic [`NE:0] DivCalcExpE
+  output logic [`NE+1:0] DivCalcExpE
 );
 
   // logic           qp, qz, qm; // quotient is +1, 0, or -1
   logic [3:0]     q;
-  logic [`NE:0] DivCalcExp;
+  logic [`NE+1:0] DivCalcExp;
   logic [`DIVLEN:0]    X;
   logic [`DIVLEN-1:0]  Dpreproc;
   logic [`DIVLEN+3:0]  WS, WSA, WSN;
   logic [`DIVLEN+3:0]  WC, WCA, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp;
+  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic           intSign;
  
-  srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
+  srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X, 
+                    .XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign);
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -88,7 +92,7 @@ module srtradix4 (
   qsel4 qsel4(.D, .WS, .WC, .q);
 
   // Store the expoenent and sign until division is DivDone
-  flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
+  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
 
   // Divisor Selection logic
   // *** radix 4 change to choose -2 to 2
@@ -113,8 +117,10 @@ module srtradix4 (
   
   //*** change for radix 4
   otfc4 otfc4(.clk, .DivStart, .q, .Quot);
+  assign DivStickyE = (WS+WC) != 0; //replace with early termination
+  assign DivNegStickyE = $signed(WS+WC) < 0; //replace with early termination
 
-  expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
+  expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
 
   divcounter divcounter(clk, DivStart, DivDone);
 
@@ -233,11 +239,10 @@ module srtpreproc (
   input  logic       Sqrt, // perform square root, not divide
   output logic [`DIVLEN:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
+  output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
   output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
   output logic       intSign // Quotient integer sign
 );
-
-  // logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
   logic  [`DIVLEN:0] PreprocA, PreprocX;
@@ -245,17 +250,21 @@ module srtpreproc (
 
   // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
   // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
-
   // lzc #(`XLEN) lzcA (PosA, zeroCntA);
   // lzc #(`XLEN) lzcB (PosB, zeroCntB);
 
+  // ***can probably merge X LZC with conversion
+  // cout the number of leading zeros
+  lzc #(`NF+1) lzcA (XManE, XZeroCnt);
+  lzc #(`NF+1) lzcB (YManE, YZeroCnt);
+
   // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
   // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
 
   // assign PreprocA = ExtraA << zeroCntA;
   // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = {XManE<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
 
   
   assign X = Int ? PreprocA : PreprocX;
@@ -358,9 +367,11 @@ endmodule
 module expcalc(
   input logic  [`NE-1:0] XExpE, YExpE,
   input logic XZeroE,
-  output logic [`NE:0] DivCalcExp
+  input logic  [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  output logic [`NE+1:0] DivCalcExp
 );
 
-  assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
+  // correct exponent for denormal shifts
+  assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
 
 endmodule
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index e8afb299b..50a651e28 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -69,8 +69,9 @@ module testbenchfp;
   logic 			          NegSumE;
   logic 			          ZSgnEffE;
   logic 			          PSgnE;
-  logic       DivSgn;
-  logic [`NE:0] DivCalcExp;
+  logic       DivSticky;
+  logic       DivNegSticky;
+  logic [`NE+1:0] DivCalcExp;
 
 
   ///////////////////////////////////////////////////////////////////////////////////////////////
@@ -644,8 +645,8 @@ module testbenchfp;
               
   postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
               .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
-              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
-              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
+              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .DivStickyM(DivSticky),
+              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .DivNegStickyM(DivNegSticky),
               .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
               .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
               .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
@@ -659,9 +660,9 @@ module testbenchfp;
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .DivStickyE(DivSticky),
                 .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
-                .DivDone, .Quot, .Rem());
+                .DivNegStickyE(DivNegSticky), .DivDone, .Quot, .Rem());
                 
   assign CmpFlg[3:0] = 0;
 

From a65c0eb6798dbdb4fef850994410f5d4ac4a1ebb Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 24 Jun 2022 21:02:50 +0000
Subject: [PATCH 02/23] radix 4 division denormal result handeling

---
 pipelined/src/fpu/divshiftcalc.sv | 14 +++++++++++---
 pipelined/src/fpu/postprocess.sv  |  5 +++--
 pipelined/srt/srt-radix4.sv       |  2 +-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 398e8f467..d1a364b3c 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -4,12 +4,20 @@ module divshiftcalc(
     input logic  [`DIVLEN+2:0] Quot,
     input logic  [`NE+1:0] DivCalcExpM,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
+    output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
     output logic [`NE+1:0] CorrDivExp
 );
-    
-    assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
+    logic ResDenorm;
+    logic [`NE+1:0] DenormShift;
+    logic [`NE+1:0] NormShift;
+    assign ResDenorm = DivCalcExpM[`NE+1];
+    assign DenormShift = (`NE+2)'(`NF-1)+DivCalcExpM;
+    assign NormShift = {(`NE+1)'(0), ~Quot[`DIVLEN+2]} + (`NE+2)'(`NF);
+    assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0] : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
+
+    assign DivShiftIn = {(`NF)'(0), Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2-`NF{1'b0}}};
     // the quotent is in the range [.5,2)
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
-    assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
+    assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
 
 endmodule
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 9516e223f..b4c8496a3 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -88,6 +88,7 @@ module postprocess(
     logic [$clog2(`NORMSHIFTSZ)-1:0]  ShiftAmt;   // normalization shift count
     logic [$clog2(`NORMSHIFTSZ)-1:0]  DivShiftAmt;
     logic [`NORMSHIFTSZ-1:0]            ShiftIn;        // is the sum zero
+    logic [`NORMSHIFTSZ-1:0] DivShiftIn;
     logic [`NORMSHIFTSZ-1:0]    Shifted;    // the shifted result
     logic                   Plus1;      // add one to the final result?
     logic                   IntInvalid, Overflow, Underflow, Invalid; // flags
@@ -142,7 +143,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
+    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSelM)
@@ -156,7 +157,7 @@ module postprocess(
             end
             2'b01: begin //div ***prob can take out
                 ShiftAmt = DivShiftAmt;
-                ShiftIn =  {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
+                ShiftIn =  DivShiftIn;
             end
             default: begin 
                 ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index eface008d..a49838ace 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -372,6 +372,6 @@ module expcalc(
 );
 
   // correct exponent for denormal shifts
-  assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
+  assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
 
 endmodule

From c1b4e7fd2c765d098a3765582be2fb47e89307c5 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 24 Jun 2022 21:23:15 +0000
Subject: [PATCH 03/23] modified result select to account for x/inf

---
 pipelined/regression/sim-testfloat-batch |  2 +-
 pipelined/src/fpu/postprocess.sv         |  1 +
 pipelined/src/fpu/resultselect.sv        | 34 +++++++++++++-----------
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/pipelined/regression/sim-testfloat-batch b/pipelined/regression/sim-testfloat-batch
index 002b3423f..f1178f1d2 100755
--- a/pipelined/regression/sim-testfloat-batch
+++ b/pipelined/regression/sim-testfloat-batch
@@ -7,4 +7,4 @@
 # sqrt   - test square root
 # all    - test everything
 
-vsim -c -do "do testfloat.do rv64fpquad all"
\ No newline at end of file
+vsim -c -do "do testfloat.do rv64fpquad $1"
\ No newline at end of file
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index b4c8496a3..6d80f661d 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -210,6 +210,7 @@ module postprocess(
     resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid,
         .IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf, 
         .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
+        .XInfM, .YInfM, .DivOp,
         .DivByZero, .FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
 
 endmodule
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index 9be046a3a..7efcc3872 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -7,6 +7,9 @@ module resultselect(
     input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic     [`FMTBITS-1:0]  OutFmt,       // output format
     input logic                     InfIn,
+    input logic                     XInfM,
+    input logic                     YInfM,
+    input logic                     DivOp,
     input logic                     XZeroM,
     input logic                     IntZeroM,
     input logic                     NaNIn,
@@ -40,6 +43,7 @@ module resultselect(
     logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
     logic [`XLEN+1:0]       NegRes;     // the negation of the result
     logic KillRes;
+    logic SelOfRes;
 
 
     // does the overflow result output the maximum normalized floating point number
@@ -59,7 +63,7 @@ module resultselect(
         end
 
         assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        // assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
         assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
         assign NormRes = {ResSgn, ResExp, ResFrac};
 
@@ -75,7 +79,7 @@ module resultselect(
         
         assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        // assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
         assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
         assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
 
@@ -93,7 +97,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    // KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
@@ -107,7 +111,7 @@ module resultselect(
                         InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    // KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
                 end
@@ -122,7 +126,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    // KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
                 end
@@ -156,7 +160,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    // KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
@@ -170,7 +174,7 @@ module resultselect(
                         InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    // KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
                 end
@@ -185,7 +189,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    // KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
                 end
@@ -201,7 +205,7 @@ module resultselect(
                     
                     OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
 
-                    KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    // KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
                 end
@@ -217,22 +221,22 @@ module resultselect(
     //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
     //      - dont set to zero if fp input is zero but not using the fp input
     //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
-
+    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (YInfM&DivOp&~XInfM);//Underflow & ~ResDenorm & (ResExp!=1);
+    assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
     // output infinity with result sign if divide by zero
     if(`IEEE754) begin
         assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
                          YNaNM&~CvtOp ? YNaNRes :
                          ZNaNM&FmaOp ? ZNaNRes :
                          Invalid ? InvalidRes : 
-                         Overflow|DivByZero|InfIn ? OfRes :
-                         KillProdM&FmaOp ? KillProdRes : 
+                         SelOfRes ? OfRes :
+                        //  KillProdM&FmaOp ? KillProdRes : 
                          KillRes ? UfRes :  
                          NormRes;
     end else begin
         assign PostProcResM = NaNIn|Invalid ? InvalidRes :
-                         Overflow|DivByZero|InfIn ? OfRes :
-                         KillProdM&FmaOp ? KillProdRes :  
+                         SelOfRes ? OfRes :
+                        //  KillProdM&FmaOp ? KillProdRes :  
                          KillRes ? UfRes :  
                          NormRes;
     end

From 913a381442834d4067894b56de0f5fdf03d1f814 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Sat, 25 Jun 2022 00:04:53 +0000
Subject: [PATCH 04/23] commented out error - also some divider bugs fixed

---
 pipelined/src/fpu/divshiftcalc.sv | 23 +++++++++++++++++++----
 pipelined/src/fpu/flags.sv        |  6 +++---
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index d1a364b3c..3a5766643 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -10,12 +10,27 @@ module divshiftcalc(
     logic ResDenorm;
     logic [`NE+1:0] DenormShift;
     logic [`NE+1:0] NormShift;
-    assign ResDenorm = DivCalcExpM[`NE+1];
-    assign DenormShift = (`NE+2)'(`NF-1)+DivCalcExpM;
-    assign NormShift = {(`NE+1)'(0), ~Quot[`DIVLEN+2]} + (`NE+2)'(`NF);
+
+    // is the result denromalized
+    // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
+    assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
+    // if the result is denormalized
+    //  00000000x.xxxxxx...                     Exp = DivCalcExp
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
+    //  .000xxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = 0
+    //  .0000xxxxxxxxxxx... >> 1                Exp = 1
+    // Left shift amount  = DivCalcExp+NF+1-1
+    assign DenormShift = (`NE+2)'(`NF)+DivCalcExpM;
+    // if the result is denormalized
+    //  00000000x.xxxxxx...                     Exp = DivCalcExp
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
+    //  00000000x.xxxxxx... << NF+1             Exp = DivCalcExp
+    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1
+    // Left shift amount  = NF+1 plus 1 if normalization required
+    assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
     assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0] : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
 
-    assign DivShiftIn = {(`NF)'(0), Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2-`NF{1'b0}}};
+    // assign DivShiftIn = {(`NF)'(0), Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
     // the quotent is in the range [.5,2)
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
     assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 122df8b21..c91e30a50 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -88,7 +88,7 @@ module flags(
     //                 |           and the exponent isn't negitive
     //                 |           |                   if the input isnt infinity or NaN
     //                 |           |                   |            
-    assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
+    assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
 
     // detecting tininess after rounding
     //                  the exponent is negitive
@@ -98,11 +98,11 @@ module flags(
     //                  |                    |                    |                                      |                     and if the result is not exact
     //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
     //                  |                    |                    |                                      |                     |               |
-    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
+    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn|DivByZero);
 
     // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
     //      - Don't set the underflow flag if an underflowed res isn't outputed
-    assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
+    assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn|DivByZero);
 
     //                  if the res is too small to be represented and not 0
     //                  |                                     and if the res is not invalid (outside the integer bounds)

From d16ae7c305f74d77547c6d28eaadda32216cda28 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Sat, 25 Jun 2022 00:31:32 +0000
Subject: [PATCH 05/23] passing regression again

---
 pipelined/src/fpu/resultselect.sv | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index 7efcc3872..bcd66ca8e 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -63,7 +63,7 @@ module resultselect(
         end
 
         assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        // assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
         assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
         assign NormRes = {ResSgn, ResExp, ResFrac};
 
@@ -79,7 +79,7 @@ module resultselect(
         
         assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        // assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
         assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
         assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
 
@@ -97,7 +97,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    // KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
@@ -111,7 +111,7 @@ module resultselect(
                         InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    // KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
                 end
@@ -126,7 +126,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    // KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
                 end
@@ -160,7 +160,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    // KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
@@ -174,7 +174,7 @@ module resultselect(
                         InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    // KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
                 end
@@ -189,7 +189,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    // KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
                 end
@@ -205,7 +205,7 @@ module resultselect(
                     
                     OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
 
-                    // KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
                 end
@@ -230,13 +230,13 @@ module resultselect(
                          ZNaNM&FmaOp ? ZNaNRes :
                          Invalid ? InvalidRes : 
                          SelOfRes ? OfRes :
-                        //  KillProdM&FmaOp ? KillProdRes : 
+                         KillProdM&FmaOp ? KillProdRes : 
                          KillRes ? UfRes :  
                          NormRes;
     end else begin
         assign PostProcResM = NaNIn|Invalid ? InvalidRes :
                          SelOfRes ? OfRes :
-                        //  KillProdM&FmaOp ? KillProdRes :  
+                         KillProdM&FmaOp ? KillProdRes :  
                          KillRes ? UfRes :  
                          NormRes;
     end

From 06f7f9b147e4947c8b4458a1ae0ffacdd5fff5eb Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Sat, 25 Jun 2022 01:42:23 +0000
Subject: [PATCH 06/23] fixed commented out error and removed killprod from
 result selection

---
 pipelined/config/shared/wally-shared.vh |  6 +++---
 pipelined/src/fpu/divshiftcalc.sv       |  3 ++-
 pipelined/src/fpu/fmashiftcalc.sv       |  5 +++--
 pipelined/src/fpu/resultselect.sv       | 14 +-------------
 pipelined/testbench/testbench-fp.sv     |  2 +-
 5 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 3c2699da0..51c45ef00 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -95,11 +95,11 @@
 
 // largest length in IEU/FPU
 `define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
+`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
 `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
-`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
-`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
+`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
+`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
 
 // Disable spurious Verilator warnings
 
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 3a5766643..e53d7cdde 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -30,7 +30,8 @@ module divshiftcalc(
     assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
     assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0] : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
 
-    // assign DivShiftIn = {(`NF)'(0), Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
+    // *** may be able to reduce shifter size
+    assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
     // the quotent is in the range [.5,2)
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
     assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 17a13dda5..2a2417281 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -120,8 +120,9 @@ module fmashiftcalc(
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    assign DenormShift = PreResultDenorm&~KillProdM ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
+    //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, SumM};
-    assign FmaShiftAmt = FmaNormCntM+DenormShift;
+    assign FmaShiftAmt = (FmaNormCntM&{$clog2(3*`NF+7){~KillProdM}})+DenormShift;
 endmodule
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index bcd66ca8e..f14b6fc12 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -38,7 +38,7 @@ module resultselect(
     output logic [1:0] NegResMSBS,
     output logic    [`XLEN-1:0]     FCvtIntResM     // final res
 );
-    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
+    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
     logic OfResMax;
     logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
     logic [`XLEN+1:0]       NegRes;     // the negation of the result
@@ -63,7 +63,6 @@ module resultselect(
         end
 
         assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
         assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
         assign NormRes = {ResSgn, ResExp, ResFrac};
 
@@ -79,7 +78,6 @@ module resultselect(
         
         assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
         assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
         assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
 
@@ -97,7 +95,6 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
@@ -111,7 +108,6 @@ module resultselect(
                         InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
                 end
@@ -126,7 +122,6 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
                 end
@@ -140,7 +135,6 @@ module resultselect(
                         InvalidRes = (`FLEN)'(0);
                     end
                     OfRes = (`FLEN)'(0);
-                    KillProdRes = (`FLEN)'(0);
                     UfRes = (`FLEN)'(0);
                     NormRes = (`FLEN)'(0);
                 end
@@ -160,7 +154,6 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
@@ -174,7 +167,6 @@ module resultselect(
                         InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
                 end
@@ -189,7 +181,6 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
                 end
@@ -205,7 +196,6 @@ module resultselect(
                     
                     OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
 
-                    KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                     UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
                     NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
                 end
@@ -230,13 +220,11 @@ module resultselect(
                          ZNaNM&FmaOp ? ZNaNRes :
                          Invalid ? InvalidRes : 
                          SelOfRes ? OfRes :
-                         KillProdM&FmaOp ? KillProdRes : 
                          KillRes ? UfRes :  
                          NormRes;
     end else begin
         assign PostProcResM = NaNIn|Invalid ? InvalidRes :
                          SelOfRes ? OfRes :
-                         KillProdM&FmaOp ? KillProdRes :  
                          KillRes ? UfRes :  
                          NormRes;
     end
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 50a651e28..c80ffceb2 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -816,7 +816,7 @@ end
   ///////////////////////////////////////////////////////////////////////////////////////////////
 
     // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);

From 2d5d1f4e8f8e9f9881a69f07624b31dfe991d557 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 27 Jun 2022 17:04:51 +0000
Subject: [PATCH 07/23] radix-4 divider passing all double precision testfloat
 tests

---
 pipelined/src/fpu/divshiftcalc.sv |  3 ++-
 pipelined/src/fpu/flags.sv        |  3 ++-
 pipelined/src/fpu/resultselect.sv | 22 +++++++++++-----------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index e53d7cdde..bee4d09c2 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -28,7 +28,8 @@ module divshiftcalc(
     //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1
     // Left shift amount  = NF+1 plus 1 if normalization required
     assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
-    assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0] : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
+    // if the shift amount is negitive then dont shift (keep sticky bit)
+    assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
 
     // *** may be able to reduce shifter size
     assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index c91e30a50..ff6495dd9 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -134,7 +134,8 @@ module flags(
     assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
 
     // if dividing by zero and not 0/0
-    assign DivByZero = YZeroM&DivOp&~XZeroM;  
+    //  - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
+    assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn);  
 
     // Combine flags
     //      - to integer results do not set the underflow or overflow flags
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index f14b6fc12..a0bf86d85 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -48,7 +48,7 @@ module resultselect(
 
     // does the overflow result output the maximum normalized floating point number
     //                output infinity if the input is infinity
-    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
+    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
 
     if (`FPSIZES == 1) begin
 
@@ -63,7 +63,7 @@ module resultselect(
         end
 
         assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+        assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)};
         assign NormRes = {ResSgn, ResExp, ResFrac};
 
     end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
@@ -78,7 +78,7 @@ module resultselect(
         
         assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
+        assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
         assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
 
     end else if (`FPSIZES == 3) begin
@@ -95,7 +95,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
                 `FMT1: begin  
@@ -108,7 +108,7 @@ module resultselect(
                         InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
+                    UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
                 end
                 `FMT2: begin  
@@ -122,7 +122,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
+                    UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
                 end
                 default: begin
@@ -154,7 +154,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
+                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {ResSgn, ResExp, ResFrac};
                 end
                 2'h1: begin  
@@ -167,7 +167,7 @@ module resultselect(
                         InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                     end
                     OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
+                    UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
                 end
                 2'h0: begin  
@@ -181,7 +181,7 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
+                    UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
                 end
                 2'h2: begin  
@@ -195,8 +195,8 @@ module resultselect(
                     end
                     
                     OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
-
-                    UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
+	            // zero is exact fi dividing by infinity so don't add 1
+                    UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
                     NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
                 end
             endcase

From f25bb4a3846687e14bec0fcd6b12317aa9fed6a6 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 27 Jun 2022 20:43:55 +0000
Subject: [PATCH 08/23] radix-4 early termination working for special cases -
 not working completely

---
 pipelined/regression/wave-fpu.do    |  2 +-
 pipelined/src/fpu/divshiftcalc.sv   | 63 +++++++++++++++++++++--
 pipelined/src/fpu/fpu.sv            |  3 +-
 pipelined/src/fpu/postprocess.sv    |  3 +-
 pipelined/src/fpu/resultselect.sv   |  2 +-
 pipelined/srt/srt-radix4.sv         | 77 +++++++++++++++--------------
 pipelined/testbench/testbench-fp.sv | 16 +++---
 7 files changed, 114 insertions(+), 52 deletions(-)

diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index a58400cca..7dfec7e24 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index bee4d09c2..d867efc44 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -3,6 +3,8 @@
 module divshiftcalc(
     input logic  [`DIVLEN+2:0] Quot,
     input logic  [`NE+1:0] DivCalcExpM,
+    input logic  [`FMTBITS-1:0] FmtM,
+    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
     output logic [`NE+1:0] CorrDivExp
@@ -10,30 +12,81 @@ module divshiftcalc(
     logic ResDenorm;
     logic [`NE+1:0] DenormShift;
     logic [`NE+1:0] NormShift;
+    logic [`NE+1:0] Nf, NfPlus1;
 
     // is the result denromalized
     // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
     assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
+    // select the proper fraction lengnth
+    if (`FPSIZES == 1) begin
+        assign Nf = (`NE+2)'(`NF);
+        assign NfPlus1 = (`NE+2)'(`NF+1);
+
+    end else if (`FPSIZES == 2) begin
+        assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
+        assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1);
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (FmtM)
+                `FMT: begin
+                    Nf = (`NE+2)'(`NF);
+                    NfPlus1 = (`NE+2)'(`NF+1);
+                end
+                `FMT1: begin
+                    Nf = (`NE+2)'(`NF1);
+                    NfPlus1 = (`NE+2)'(`NF1+1);
+                end
+                `FMT2: begin
+                    Nf = (`NE+2)'(`NF2);
+                    NfPlus1 = (`NE+2)'(`NF2+1);
+                end
+                default: begin
+                    Nf = 1'bx;
+                    NfPlus1 = 1'bx;
+                end
+            endcase
+    end else if (`FPSIZES == 4) begin
+        always_comb
+            case (FmtM)
+                2'h3: begin
+                    Nf = (`NE+2)'(`Q_NF);
+                    NfPlus1 = (`NE+2)'(`Q_NF+1);
+                end
+                2'h1: begin
+                    Nf = (`NE+2)'(`D_NF);
+                    NfPlus1 = (`NE+2)'(`D_NF+1);
+                end
+                2'h0: begin
+                    Nf = (`NE+2)'(`S_NF);
+                    NfPlus1 = (`NE+2)'(`S_NF+1);
+                end
+                2'h2: begin
+                    Nf = (`NE+2)'(`H_NF);
+                    NfPlus1 = (`NE+2)'(`H_NF+1);
+                end
+            endcase
+    end
     // if the result is denormalized
     //  00000000x.xxxxxx...                     Exp = DivCalcExp
     //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
     //  .000xxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = 0
     //  .0000xxxxxxxxxxx... >> 1                Exp = 1
     // Left shift amount  = DivCalcExp+NF+1-1
-    assign DenormShift = (`NE+2)'(`NF)+DivCalcExpM;
-    // if the result is denormalized
+    assign DenormShift = Nf+DivCalcExpM;
+    // if the result is normalized
     //  00000000x.xxxxxx...                     Exp = DivCalcExp
     //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
     //  00000000x.xxxxxx... << NF+1             Exp = DivCalcExp
     //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1
     // Left shift amount  = NF+1 plus 1 if normalization required
-    assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
+    assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
     // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
+    assign DivShiftAmt = (ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0};
 
     // *** may be able to reduce shifter size
     assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
-    // the quotent is in the range [.5,2)
+    // the quotent is in the range [.5,2) if there is no early termination
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
     assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
 
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 2f43b27d4..aba1a8f48 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -127,6 +127,7 @@ module fpu (
    logic [`NE+1:0] DivCalcExpM;
    logic DivNegStickyM;
    logic DivStickyM;
+   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -357,7 +358,7 @@ module fpu (
 
    assign FpLoadM = FResSelM[1];
 
-   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
                            .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
                            .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
                            .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 6d80f661d..217e3f586 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -49,6 +49,7 @@ module postprocess(
     input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
     input logic                             PSgnM,      // the product's sign
     input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
+    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
     input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
     input logic [`NE+1:0]           DivCalcExpM,    // the calculated expoent
@@ -143,7 +144,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSelM)
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index a0bf86d85..d6d15e46f 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -211,7 +211,7 @@ module resultselect(
     //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
     //      - dont set to zero if fp input is zero but not using the fp input
     //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (YInfM&DivOp&~XInfM);//Underflow & ~ResDenorm & (ResExp!=1);
+    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
     assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
     // output infinity with result sign if divide by zero
     if(`IEEE754) begin
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index a49838ace..179fbf45a 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -36,11 +36,14 @@ module srtradix4 (
   input  logic [`NE-1:0] XExpE, YExpE,
   input  logic [`NF:0] XManE, YManE,
   input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic XZeroE,
+  input  logic XInfE, YInfE, 
+  input  logic XZeroE, YZeroE, 
+  input  logic XNaNE, YNaNE, 
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
+  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
   output logic       DivDone,
   output logic       DivStickyE,
   output logic       DivNegStickyE,
@@ -49,10 +52,9 @@ module srtradix4 (
   output logic [`NE+1:0] DivCalcExpE
 );
 
-  // logic           qp, qz, qm; // quotient is +1, 0, or -1
   logic [3:0]     q;
   logic [`NE+1:0] DivCalcExp;
-  logic [`DIVLEN:0]    X;
+  logic [`DIVLEN-1:0]    X;
   logic [`DIVLEN-1:0]  Dpreproc;
   logic [`DIVLEN+3:0]  WS, WSA, WSN;
   logic [`DIVLEN+3:0]  WC, WCA, WCN;
@@ -68,13 +70,11 @@ module srtradix4 (
   // When start is asserted, the inputs are loaded into the divider.
   // Otherwise, the divisor is retained and the partial remainder
   // is fed back for the next iteration.
-  //  - assumed one is added here since all numbers are normlaized
-  //    *** wait what about zero? is that specal case? can the divider handle it?
   //  - when the start signal is asserted X and 0 are loaded into WS and WC
   //  - otherwise load WSA into the flipflop
-  //  *** what does N and A stand for?
-  //  *** change shift amount for radix4
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
+  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
+  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
   flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
   mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
@@ -117,12 +117,11 @@ module srtradix4 (
   
   //*** change for radix 4
   otfc4 otfc4(.clk, .DivStart, .q, .Quot);
-  assign DivStickyE = (WS+WC) != 0; //replace with early termination
-  assign DivNegStickyE = $signed(WS+WC) < 0; //replace with early termination
 
   expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
 
-  divcounter divcounter(clk, DivStart, DivDone);
+  earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
+                  .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
 
 endmodule
 
@@ -130,28 +129,35 @@ endmodule
 // Submodules //
 ////////////////
 
-/////////////
-// counter //
-/////////////
-module divcounter(input  logic clk, 
-               input  logic DivStart, 
-               output logic DivDone);
+module earlytermination(
+  input  logic clk, 
+	input logic [`DIVLEN+3:0] WS, WC,
+  input  logic XInfE, YInfE, 
+  input  logic XZeroE, YZeroE, 
+  input  logic XNaNE, YNaNE, 
+  input  logic DivStart, 
+  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
+  output logic DivStickyE,
+  output logic DivNegStickyE,
+  output logic DivDone);
  
-   logic    [5:0]  count;
-
-  // This block of control logic sequences the divider
-  // through its iterations.  You may modify it if you
-  // build a divider which completes in fewer iterations.
-  // You are not responsible for the (trivial) circuit
-  // design of the block.
+   logic [$clog2(`DIVLEN/2+3)-1:0]  Count;
+   logic WZero;
 
+   assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary
+   // *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop???
+  assign DivDone = (DivStickyE | WZero);
+  assign DivStickyE = ~|Count;
+  assign DivNegStickyE = $signed(WS+WC) < 0;
+  assign EarlyTermShiftDiv2E = Count;
+  // +1 for setup
+  // `DIVLEN/2 to get required number of bits
+  // +1 for possible .5 and round bit
+  // Count down Counter
   always @(posedge clk)
     begin
-      DivDone = 0;
-      if      (count == `DIVLEN/2+1) DivDone <= #1 1;
-      else if (DivDone | DivStart) DivDone <= #1 0;	
-      if (DivStart) count <= #1 0;
-      else     count <= #1 count+1;
+      if (DivStart) Count <= #1 `DIVLEN/2+2;
+      else     Count <= #1 Count-1;
     end
 endmodule
 
@@ -237,7 +243,7 @@ module srtpreproc (
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
-  output logic [`DIVLEN:0] X,
+  output logic [`DIVLEN-1:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
   output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
@@ -245,7 +251,7 @@ module srtpreproc (
 );
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
-  logic  [`DIVLEN:0] PreprocA, PreprocX;
+  logic  [`DIVLEN-1:0] PreprocA, PreprocX;
   logic  [`DIVLEN-1:0] PreprocB, PreprocY;
 
   // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
@@ -263,7 +269,7 @@ module srtpreproc (
 
   // assign PreprocA = ExtraA << zeroCntA;
   // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
   assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
 
   
@@ -300,7 +306,7 @@ module otfc4 (
   // if starting a new divison set Q to 0 and QM to -1
   mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
   mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
   flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   // shift Q (quotent) and QM (quotent-1)
@@ -331,8 +337,7 @@ module otfc4 (
       QMNext = {QMR, 2'b11};
     end 
   end
-  // Quot is in the range [.5, 2) so normalize the result if nesissary
-  // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
+  // Final Quoteint is in the range [.5, 2)
 
 endmodule
 
@@ -371,7 +376,7 @@ module expcalc(
   output logic [`NE+1:0] DivCalcExp
 );
 
-  // correct exponent for denormal shifts
+  // correct exponent for denormalized input's normalization shifts
   assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
 
 endmodule
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index c80ffceb2..bbe045972 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -55,6 +55,7 @@ module testbenchfp;
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
 	logic [`DIVLEN+2:0] Quot;
   logic CvtResDenormUfE;
+  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
   logic DivStart, DivDone;
   
 
@@ -651,7 +652,7 @@ module testbenchfp;
               .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
               .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
               .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
-              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
+              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
               .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
   
   fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
@@ -660,9 +661,9 @@ module testbenchfp;
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .DivStickyE(DivSticky),
-                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
-                .DivNegStickyE(DivNegSticky), .DivDone, .Quot, .Rem());
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
+                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
+                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
                 
   assign CmpFlg[3:0] = 0;
 
@@ -815,8 +816,9 @@ end
 
   ///////////////////////////////////////////////////////////////////////////////////////////////
 
-    // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    // check if result is correct
+    //  - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@@ -839,7 +841,7 @@ end
       $stop;
     end
 
-    if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
+    if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
 
     if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
 

From 54938c7abff7e1d99f3f145df6c71b5476f09655 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Mon, 27 Jun 2022 21:44:06 +0000
Subject: [PATCH 09/23] Added int tests

---
 pipelined/srt/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile
index 49b21be7a..f2ed6f8ca 100644
--- a/pipelined/srt/Makefile
+++ b/pipelined/srt/Makefile
@@ -23,5 +23,10 @@ qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
 	gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
 	./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
 
+inttestgen: inttestgen.c
+	gcc -lm -o inttestgen inttestgen.c
+	./inttestgen
+
 clean:
 	rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
+	

From 3a40c68549a38c091bedde6472655bf78edb19c0 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Mon, 27 Jun 2022 23:55:21 +0000
Subject: [PATCH 10/23] Updated radix 2 divider to work with integers and
 floats in new structure. Integers still might not work.

---
 addins/riscv-arch-test                  |  2 +-
 pipelined/config/shared/wally-shared.vh |  6 +++---
 pipelined/srt/exptestgen.c              |  2 +-
 pipelined/srt/srt.sv                    | 19 +++++++++----------
 pipelined/srt/testbench.sv              | 12 ++++++------
 5 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index be67c99bd..307c77b26 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
+Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 51c45ef00..5db8af1cf 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -94,9 +94,9 @@
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
 
 // largest length in IEU/FPU
-`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
-`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
-`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
+`define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
+`define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
 `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
 `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c
index bd51126e7..61fe74aa4 100644
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@@ -46,7 +46,7 @@ void main(void)
   int i, j;
   int bias = 1023;
 
-  if ((fptr = fopen("testvectors","w")) == NULL) {
+  if ((fptr = fopen("testvectors","w")) == NULL) { 
     fprintf(stderr, "Couldn't write testvectors file\n");
     exit(1);
   }
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index e40f27589..5adeced47 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -2,7 +2,7 @@
 // srt.sv
 //
 // Written: David_Harris@hmc.edu 13 January 2022
-// Modified: 
+// Modified: cturek@hmc.edu June 2022
 //
 // Purpose: Combined Divide and Square Root Floating Point and Integer Unit
 // 
@@ -29,10 +29,8 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 `include "wally-config.vh"
-
-`define DIVLEN ((`NF<(`XLEN+1)) ? (`XLEN + 1) : `NF)
-`define EXTRAFRACBITS ((`NF<(`XLEN+1)) ? (`XLEN - `NF + 1) : 0)
-`define EXTRAINTBITS ((`NF<(`XLEN+1)) ? 0 : (`NF - `XLEN))
+`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
+`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
 
 module srt (
   input  logic clk,
@@ -131,11 +129,11 @@ module srtpreproc (
   lzc #(`XLEN) lzcA (PosA, zeroCntA);
   lzc #(`XLEN) lzcB (PosB, zeroCntB);
 
-  assign ExtraA = {1'b0, PosA, {`EXTRAINTBITS{1'b0}}};
-  assign ExtraB = {1'b0, PosB, {`EXTRAINTBITS{1'b0}}};
+  assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
+  assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
 
   assign PreprocA = ExtraA << zeroCntA;
-  assign PreprocB = ExtraB << (zeroCntB + 1);
+  assign PreprocB = ExtraB << zeroCntB;
   assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
   assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
 
@@ -228,14 +226,15 @@ module otfc2 #(parameter N=65) (
   //
   //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
-  logic [N+2:0] Q, QM, QNext, QMNext;
+  logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
   //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
   logic [N+1:0] QR, QMR;
 
   flopr #(N+3) Qreg(clk, Start, QNext, Q);
-  flopr #(N+3) QMreg(clk, Start, QMNext, QM);
+  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
+  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   always_comb begin
     QR  = Q[N+1:0];
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 93da74752..9655d7f70 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -1,4 +1,4 @@
-`define DIVLEN 65
+`define DIVLEN 64
 
 /////////////
 // counter //
@@ -17,7 +17,7 @@ module counter(input  logic clk,
 
   always @(posedge clk)
     begin
-      if      (count == `DIVLEN+1) done <= #1 1;
+      if      (count == `DIVLEN + 2) done <= #1 1;
       else if (done | req) done <= #1 0;	
       if (req) count <= #1 0;
       else     count <= #1 count+1;
@@ -101,8 +101,8 @@ module testbench;
       b = Vec[`memb];
       {bsign, bExp, bfrac} = b;
       nextr = Vec[`memr];
-      r = Quot[`DIVLEN:`DIVLEN - 52];
-      rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
+      r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
+      rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
       req <= #5 1;
     end
   
@@ -110,8 +110,8 @@ module testbench;
 
   always @(posedge clk)
     begin
-      r = Quot[`DIVLEN:`DIVLEN - 52];
-      rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
+      r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
+      rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)];
       if (done) 
 	begin
 	  req <= #5 1;

From f2d05911ca3b151e01db9489df557bc8eb339a16 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 28 Jun 2022 00:16:22 +0000
Subject: [PATCH 11/23] very basic early termination passes testfloat 64-bit
 tests

---
 pipelined/src/fpu/divshiftcalc.sv  | 25 +++++++++++--------------
 pipelined/src/fpu/lzacorrection.sv | 14 +++++++++++++-
 pipelined/src/fpu/postprocess.sv   |  7 +++++--
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index d867efc44..51698590e 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -7,16 +7,15 @@ module divshiftcalc(
     input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
-    output logic [`NE+1:0] CorrDivExp
+    output logic DivResDenorm,
+    output logic [`NE+1:0] DivDenormShift
 );
-    logic ResDenorm;
-    logic [`NE+1:0] DenormShift;
     logic [`NE+1:0] NormShift;
     logic [`NE+1:0] Nf, NfPlus1;
 
     // is the result denromalized
     // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
-    assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
+    assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]);
     // select the proper fraction lengnth
     if (`FPSIZES == 1) begin
         assign Nf = (`NE+2)'(`NF);
@@ -70,24 +69,22 @@ module divshiftcalc(
     // if the result is denormalized
     //  00000000x.xxxxxx...                     Exp = DivCalcExp
     //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  .000xxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = 0
+    //  .00xxxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = +1
     //  .0000xxxxxxxxxxx... >> 1                Exp = 1
     // Left shift amount  = DivCalcExp+NF+1-1
-    assign DenormShift = Nf+DivCalcExpM;
+    assign DivDenormShift = Nf+DivCalcExpM;
     // if the result is normalized
     //  00000000x.xxxxxx...                     Exp = DivCalcExp
     //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  00000000x.xxxxxx... << NF+1             Exp = DivCalcExp
-    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1
-    // Left shift amount  = NF+1 plus 1 if normalization required
-    assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
+    //  00000000.xxxxxxx... << NF               Exp = DivCalcExp+1
+    //  00000000x.xxxxxx... << NF               Exp = DivCalcExp (extra shift done afterwards)
+    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1 (determined after)
+    // inital Left shift amount  = NF
+    assign NormShift = Nf;
     // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = (ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0};
+    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
 
     // *** may be able to reduce shifter size
     assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
-    // the quotent is in the range [.5,2) if there is no early termination
-    // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
-    assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
 
 endmodule
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv
index f06dd84a9..e5a2d5c34 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@@ -3,14 +3,20 @@
 module lzacorrection(
     input logic  [`NORMSHIFTSZ-1:0]     Shifted,         // the shifted sum before LZA correction
     input logic                         FmaOp,
+    input logic                         DivOp,
+    input logic DivResDenorm,
+    input logic  [`NE+1:0] DivCalcExpM,
+    input logic [`NE+1:0] DivDenormShift,
     input logic  [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                         PreResultDenorm,    // is the result denormalized - calculated before LZA corection
     input logic                         KillProdM,  // is the product set to zero
     input logic                         SumZero,
     output logic  [`CORRSHIFTSZ-1:0]    CorrShifted,         // the shifted sum before LZA correction
+    output logic [`NE+1:0] CorrDivExp,
     output logic [`NE+1:0]              SumExp         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
+    logic [`CORRSHIFTSZ:0]           CorrQuotShifted;
     logic                        ResDenorm;    // is the result denormalized
     logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
 
@@ -19,11 +25,17 @@ module lzacorrection(
     assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
     assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
-    assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
+    assign CorrQuotShifted =  {LZAPlus2|(DivCalcExpM==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
+    // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
+    assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
     assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
+    // the quotent is in the range [.5,2) if there is no early termination
+    // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
+    assign CorrDivExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~LZAPlus2};
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 217e3f586..ab06a9406 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -112,6 +112,8 @@ module postprocess(
     logic UfLSBRes;
     logic Sqrt;
     logic [`FMTBITS-1:0] OutFmt;
+    logic DivResDenorm;
+    logic [`NE+1:0] DivDenormShift;
 
     // signals to help readability
     assign Signed = FOpCtrlM[0];
@@ -144,7 +146,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSelM)
@@ -169,7 +171,8 @@ module postprocess(
     normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
     lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
-                                .SumZero, .Shifted, .SumExp, .CorrShifted);
+                                .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExpM,
+                                .CorrDivExp, .SumZero, .Shifted, .SumExp, .CorrShifted);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding

From ddf757078b2f1a63acd7d3f5d93d25d847c0d911 Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Mon, 27 Jun 2022 18:56:35 -0700
Subject: [PATCH 12/23] Added reset read testcodes to GPIO

---
 .../references/WALLY-gpio-01.reference_output  | 13 +++++++++++++
 .../rv32i_m/privilege/src/WALLY-gpio-01.S      | 18 +++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output
index 3cbf56ae5..3f6dcc8e1 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output
@@ -1,5 +1,18 @@
 00000000 # test reset to zero
 00000000
+00000000 # output_en
+00000000 # output_val
+00000000 # rise_ie
+00000000 # rise_ip
+00000000 # fall_ie
+00000000 # fall_ip
+00000000 # high_ie
+00000000 # high_ip
+00000000 # fall_ie
+ffffffff # fall_ip
+00000000 # iof_en
+00000000 # iof_sel
+00000000 # out_xor
 A5A5A5A5 # test output pins
 5A5AFFFF
 00000000 # test input enables
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
index be40c0e26..4b2496a77 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
@@ -70,9 +70,21 @@ test_cases:
 
 # =========== Verify all registers reset to zero ===========
 
-.4byte input_val, 0x00000000, read32_test  # input_val reset to zero
-.4byte input_en, 0x00000000, read32_test  # input_en reset to zero
-# *** add more
+.4byte input_val, 0x00000000, read32_test   # input_val reset to zero
+.4byte input_en, 0x00000000, read32_test    # input_en reset to zero
+.4byte output_en, 0x00000000, read32_test   # output_en reset to zero
+.4byte output_val, 0x00000000, read32_test  # output_val reset to zero
+.4byte rise_ie, 0x00000000, read32_test     # rise_ie reset to zero
+.4byte rise_ip, 0x00000000, read32_test     # rise_ip reset to zero
+.4byte fall_ie, 0x00000000, read32_test     # fall_ie reset to zero
+.4byte fall_ip, 0xffffffff, read32_test     # fall_ip reset to ones (input_val is zero)
+.4byte high_ie, 0x00000000, read32_test     # high_ie reset to zero
+.4byte high_ip, 0x00000000, read32_test     # high_ip reset to zero
+.4byte low_ie, 0x00000000, read32_test      # low_ie reset to zero
+.4byte low_ip, 0x00000000, read32_test      # low_ip reset to zero
+.4byte iof_en, 0x00000000, read32_test      # iof_en reset to zero
+.4byte iof_sel, 0x00000000, read32_test     # iof_sel reset to zero
+.4byte out_xor, 0x00000000, read32_test     # out_xor reset to zero
 
 # =========== Test output and input pins ===========
 

From ee8349e8324a305740c9ce2a54dddb45522171de Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Mon, 27 Jun 2022 18:59:44 -0700
Subject: [PATCH 13/23] will this work in git

---
 .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h
index c24952b42..fdfc3e6d5 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h
@@ -857,6 +857,27 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
     addi a6, a6, 8 
 .endm
 
+.macro SETUP_PLIC  
+    # Setup PLIC with a series of register writes
+
+    .equ PLIC_INTPRI_GPIO, 0x0C00000C       # GPIO is interrupt 3
+    .equ PLIC_INTPRI_UART, 0x0C000028       # UART is interrupt 10
+    .equ PLIC_INTPENDING0, 0x0C001000       # intPending0 register
+    .equ PLIC_INTEN00,     0x0C002000       # interrupt enables for context 0 (machine mode) sources 31:1
+    .equ PLIC_INTEN10,     0x0C002080       # interrupt enables for context 1 (supervisor mode) sources 31:1
+    .equ PLIC_THRESH0,     0x0C200000       # Priority threshold for context 0 (machine mode)
+    .equ PLIC_CLAIM0,      0x0C200004       # Claim/Complete register for context 0
+    .equ PLIC_THRESH1,     0x0C201000       # Priority threshold for context 1 (supervisor mode)
+    .equ PLIC_CLAIM1,      0x0C201004       # Claim/Complete register for context 1
+
+    .4byte PLIC_THRESH0, 0, write32_test    # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
+    .4byte PLIC_THRESH1, 7, write32_test    # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
+    .4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
+    .4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
+    .4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
+    .4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
+.endm
+
 .macro END_TESTS
     // invokes one final ecall to return to machine mode then terminates this program, so the output is
     //      0x8: termination called from U mode
@@ -984,6 +1005,20 @@ read08_test:
     addi a6, a6, 8
     j test_loop // go to next test case
 
+readmip_test:  // read the MIP into the signature
+    csrr t2, mip
+    sw t2, 0(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
+    j test_loop // go to next test case
+
+readsip_test:  // read the MIP into the signature
+    csrr t2, sip
+    sw t2, 0(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
+    j test_loop // go to next test case
+
 goto_s_mode:
     // return to address in t3, 
     li a0, 3 // Trap handler behavior (go to supervisor mode)

From f458deaf00f5a1c21c17a5cff49b61966274a8db Mon Sep 17 00:00:00 2001
From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com>
Date: Tue, 28 Jun 2022 02:23:29 +0000
Subject: [PATCH 14/23] make clean rm extra files

---
 synthDC/Makefile        | 6 ++++--
 synthDC/runAllSynths.sh | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/synthDC/Makefile b/synthDC/Makefile
index 53faa4522..611dcfef9 100755
--- a/synthDC/Makefile
+++ b/synthDC/Makefile
@@ -5,8 +5,8 @@ NAME := synth
 
 # defaults
 export DESIGN ?= wallypipelinedcore
-export FREQ ?= 4000
-export CONFIG ?= rv64gc
+export FREQ ?= 3402
+export CONFIG ?= rv32e
 # sky130 and sky90 presently supported
 export TECH ?= tsmc28
 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient
@@ -126,6 +126,8 @@ clean:
 	rm -f command.log
 	rm -f filenames*.log
 	rm -f power.saif
+	rm -f Synopsys_stack_trace_*.txt
+	rm -f crte_*.txt
 
 
 
diff --git a/synthDC/runAllSynths.sh b/synthDC/runAllSynths.sh
index 1b81a6cd0..6944552d4 100755
--- a/synthDC/runAllSynths.sh
+++ b/synthDC/runAllSynths.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/bash
 
+make clean
 mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p")
 mv newRuns runs
 mkdir newRuns

From aa253748fc08992e1aa984d5d18bb4e36a73a48d Mon Sep 17 00:00:00 2001
From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com>
Date: Tue, 28 Jun 2022 02:28:13 +0000
Subject: [PATCH 15/23] update wally synth analysis

---
 synthDC/extractSummary.py | 49 ++++++++++++++++++++++-----------------
 synthDC/wallySynth.py     | 12 ++++++----
 2 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py
index 4469d4bea..a2f6a9b50 100755
--- a/synthDC/extractSummary.py
+++ b/synthDC/extractSummary.py
@@ -7,6 +7,7 @@ import subprocess
 from matplotlib.cbook import flatten
 import matplotlib.pyplot as plt
 import matplotlib.lines as lines
+from wallySynth import testFreq
 
 
 def synthsintocsv():
@@ -26,7 +27,7 @@ def synthsintocsv():
     writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area'])
 
     for oneSynth in allSynths:
-        descrip = specReg.findall(oneSynth)
+        descrip = specReg.findall(oneSynth) #[30:]
         width = descrip[2][:4]
         config = descrip[2][4:]
         if descrip[3][-2:] == 'nm':
@@ -46,7 +47,7 @@ def synthsintocsv():
                 nums = [float(m) for m in nums]
                 metrics += nums
             except: 
-                print(config + tech + freq + " doesn't have reports")
+                print(width + config + tech + '_' + freq + " doesn't have reports")
         if metrics == []:
             pass
         else:
@@ -56,7 +57,7 @@ def synthsintocsv():
     file.close()
 
 def synthsfromcsv(filename):
-    Synth = namedtuple("Synth", " width config special tech freq delay area")
+    Synth = namedtuple("Synth", "width config special tech freq delay area")
     with open(filename, newline='') as csvfile:
         csvreader = csv.reader(csvfile)
         global allSynths
@@ -110,23 +111,26 @@ def freqPlot(tech, width, config):
     plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png')
     # plt.show()
 
-def areaDelay(width, tech, freq, config=None, special=None):
+def areaDelay(tech, freq, width=None, config=None, special=None):
     delays, areas, labels = ([] for i in range(3))
 
     for oneSynth in allSynths:
-        if (width == oneSynth.width) & (tech == oneSynth.tech) & (freq == oneSynth.freq):
-            if (special != None) & (oneSynth.special == special):
-                delays += [oneSynth.delay]
-                areas += [oneSynth.area]
-                labels += [oneSynth.config]
-            elif (config != None) & (oneSynth.config == config):
-                delays += [oneSynth.delay]
-                areas += [oneSynth.area]
-                labels += [oneSynth.special]
-            else:
-                delays += [oneSynth.delay]
-                areas += [oneSynth.area]
-                labels += [oneSynth.config + '_' + oneSynth.special]
+        if (width==None) or (width == oneSynth.width):
+            if (tech == oneSynth.tech) & (freq == oneSynth.freq):
+                if (special != None) & (oneSynth.special == special):
+                    delays += [oneSynth.delay]
+                    areas += [oneSynth.area]
+                    labels += [oneSynth.width + oneSynth.config]
+                elif (config != None) & (oneSynth.config == config):
+                    delays += [oneSynth.delay]
+                    areas += [oneSynth.area]
+                    labels += [oneSynth.special]
+            # else:
+            #     delays += [oneSynth.delay]
+            #     areas += [oneSynth.area]
+            #     labels += [oneSynth.config + '_' + oneSynth.special]
+    if width == None:
+        width = ''
     
     f, (ax1) = plt.subplots(1, 1)
     plt.scatter(delays, areas)
@@ -154,8 +158,11 @@ def areaDelay(width, tech, freq, config=None, special=None):
 # ending freq in 42 means fpu was turned off manually
 
 if __name__ == '__main__':
-    synthsintocsv()
+    # synthsintocsv()
     synthsfromcsv('Summary.csv')
-    freqPlot('tsmc28', 'rv64', 'gc')
-    areaDelay('rv32', 'tsmc28', 4200, config='gc')
-    areaDelay('rv32', 'tsmc28', 3042, special='')
\ No newline at end of file
+    freqPlot('tsmc28', 'rv32', 'e')
+    freqPlot('sky90', 'rv32', 'e')
+    areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc')
+    areaDelay('tsmc28', testFreq[1], special='')
+    areaDelay('sky90', testFreq[0], width='rv64', config='gc')
+    areaDelay('sky90', testFreq[0], special='')
\ No newline at end of file
diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py
index bf32b6f9b..99d70e813 100755
--- a/synthDC/wallySynth.py
+++ b/synthDC/wallySynth.py
@@ -8,20 +8,22 @@ def runCommand(config, tech, freq):
     command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq)
     subprocess.Popen(command, shell=True)
 
+testFreq = [3000, 10000]
+
 if __name__ == '__main__':
 
     techs = ['sky90', 'tsmc28']
-    bestAchieved = [750, 3000]
+    sweepCenter = [870, 3000]
     synthsToRun = []
 
-    
     arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
     for i in [0, 1]:
         tech = techs[i]
-        f = bestAchieved[i]
-        for freq in [round(f+f*x/100) for x in arr]: # rv32e freq sweep
+        sc = sweepCenter[i]
+        f = testFreq[i]
+        for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep
             synthsToRun += [['rv32e', tech, freq]]
-        for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic']: # configs
+        for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic', 'rv32e']: # configs
             synthsToRun += [[config, tech, f]]
         for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations
             config = 'rv64gc_' + mod

From 448c9fdbb90ca671fc337f11e161b3f8f7209f5d Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Mon, 27 Jun 2022 20:09:58 -0700
Subject: [PATCH 16/23] Add CLINT tests from book

---
 pipelined/testbench/tests.vh                  |   5 +-
 .../rv32i_m/privilege/Makefrag                |   1 +
 .../WALLY-clint-01.reference_output           |   9 ++
 .../rv32i_m/privilege/src/WALLY-clint-01.S    | 102 ++++++++++++++++++
 4 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S

diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index c17cef914..30b00cf48 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -1601,6 +1601,9 @@ string wally32i[] = '{
 
  string wally32periph[] = '{
     `WALLYTEST,
-    "rv32i_m/privilege/WALLY-gpio-01"
+    "rv32i_m/privilege/WALLY-gpio-01",
+    "rv32i_m/privilege/WALLY-clint-01"
+    // "rv32i_m/privilege/WALLY-plic-01"
+    // "rv32i_m/privilege/WALLY-uart-01"
  };
 
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag
index 5d98f81cc..56b3bc01f 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag
@@ -54,6 +54,7 @@ target_tests_nosim = \
     WALLY-status-sie-01 \
     WALLY-status-tw-01 \
     WALLY-gpio-01 \
+    WALLY-clint-01 \
 
 
 rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output
new file mode 100644
index 000000000..013ef4604
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-clint-01.reference_output
@@ -0,0 +1,9 @@
+00000000 # msip zero on reset
+00000000 # mip is zero
+00000008 # mip msip bit is set
+00000000 # mip msip bit is reset
+00000000 # mip mtip bit is reset
+FFFFFFFF # mtimecmp is same as written value
+A5A5A5A5 # mtimecmph is same as written value
+00000000 # mip mtip is zero
+00000080 # mip mtip is set
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S
new file mode 100644
index 000000000..65f078b60
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S
@@ -0,0 +1,102 @@
+///////////////////////////////////////////
+//
+// WALLY-gpio
+//
+// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
+//
+// Created 2022-06-16
+//
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+#include "WALLY-TEST-LIB-32.h" 
+
+INIT_TESTS
+
+TRAP_HANDLER m
+
+j run_test_loop // begin test loop/table tests instead of executing inline code.
+
+INIT_TEST_TABLE
+
+END_TESTS
+
+TEST_STACK_AND_DATA
+
+.align 2
+test_cases:
+# ---------------------------------------------------------------------------------------------
+# Test Contents
+#
+#   Here is where the actual tests are held, or rather, what the actual tests do.
+#   each entry consists of 3 values that will be read in as follows:
+#   
+#   '.4byte [x28 Value], [x29 Value], [x30 value]'
+#                     or
+#   '.4byte [address], [value], [test type]'
+#
+#   The encoding for x30 test type values can be found in the test handler in the framework file
+# 
+# ---------------------------------------------------------------------------------------------
+
+# =========== Define CLINT registers ===========
+
+.equ CLINT, 0x02000000
+.equ msip, (CLINT+0x00)
+.equ mtimecmp, (CLINT+0x4000)   # doesn't necessarily reset to zero
+.equ mtimecmph,(CLINT+0x4004)
+.equ mtime, (CLINT+0xBFF8)      # resets to zero but cannot be easily tested
+.equ mtimeh, (CLINT+0xBFFC)
+
+# =========== Verify verifiable registers reset to zero ===========
+
+.4byte msip, 0x00000000, read32_test    # msip reset to zero
+
+# =========== msip tests ===========
+
+.4byte msip, 0xFFFFFFFE, write32_test   # write to invalid bits of msip
+.4byte 0x0, 0x00000000, readmip_test    # msip bit should be zero
+.4byte msip, 0x00000001, write32_test   # set msip to one
+.4byte 0x0, 0x00000008, readmip_test    # msip bit is set  
+.4byte msip, 0x00000000, write32_test   # set msip to zero
+.4byte 0x0, 0x00000000, readmip_test    # msip bit is released
+
+# =========== mtime write tests ===========
+
+.4byte mtime, 0x00000000, write32_test  # test we can write to mtime
+.4byte mtimeh, 0x00000000, write32_test # test we can write to mtimeh
+.4byte 0x0,0x00000000, readmip_test     # mtip bit should be zero
+
+# =========== mtimecmp tests ===========
+
+.4byte mtimecmp, 0xFFFFFFFF, write32_test   # verify mtimecmp is writable
+.4byte mtimecmph, 0xA5A5A5A5, write32_test  # verify mtimecmph is writable
+.4byte mtimecmp, 0xFFFFFFFF, read32_test    # read back value written to mtimecmp
+.4byte mtimecmph, 0xA5A5A5A5, read32_test   # read back value written to mtimecmph
+.4byte mtime, 0xFFFFFFFF, write32_test      # write to mtime
+.4byte 0x0, 0x00000000, readmip_test        # mtip should still be zero
+.4byte mtimeh, 0xA5A5A5A6, write32_test     # cause mtip to go high by making mtime > mtimecmp
+.4byte 0x0, 0x00000080, readmip_test        # mtip should be set
+
+
+# =========== Experimental mtime counting test ===========
+
+# .4byte mtimecmph, 0xFFFFFFFF, write32_test  # make sure mtip isn't set until ready
+# .4byte mtimeh, 0x0FFFFFFF, write32_test     # write near max value to mtimeh
+# .4byte mtime, 0x00000000, write32_test      # write small value to mtime
+# .4byte 0x0, 0x000000000, readmip_test       # mtip should be zero
+# .4byte mtimecmp, 0x00000001, write32_test   # write slightly larger value than mtime to test mtime counting
+# .4byte mtimecmph, 0x0FFFFFFF, write32_test  # write same value as mtimeh to test mtime counting
+# .4byte 0x0, 0x00000080, readmip_test        # mtip should be set since it has been at least two cycles

From 5ef1266d7608604cdba2bfc9686431c9893d48d3 Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Mon, 27 Jun 2022 20:16:29 -0700
Subject: [PATCH 17/23] Added termination line to CLINT test

---
 .../riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S
index 65f078b60..7cfd83c1a 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S
@@ -90,6 +90,7 @@ test_cases:
 .4byte mtimeh, 0xA5A5A5A6, write32_test     # cause mtip to go high by making mtime > mtimecmp
 .4byte 0x0, 0x00000080, readmip_test        # mtip should be set
 
+.4byte 0x0, 0x0, terminate_test # terminate tests
 
 # =========== Experimental mtime counting test ===========
 

From 478a2e2a4b91e4a22865ada376758660d9c474da Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 28 Jun 2022 18:01:11 +0000
Subject: [PATCH 18/23] removed an adder out of early termination

---
 addins/riscv-arch-test      | 2 +-
 pipelined/srt/srt-radix4.sv | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index 307c77b26..be67c99bd 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index 179fbf45a..39432c9e3 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -143,12 +143,13 @@ module earlytermination(
  
    logic [$clog2(`DIVLEN/2+3)-1:0]  Count;
    logic WZero;
+   logic [`DIVLEN+3:0] W;
 
-   assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary
-   // *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop???
+  assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE;
   assign DivDone = (DivStickyE | WZero);
   assign DivStickyE = ~|Count;
-  assign DivNegStickyE = $signed(WS+WC) < 0;
+  assign W = WC+WS;
+  assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
   assign EarlyTermShiftDiv2E = Count;
   // +1 for setup
   // `DIVLEN/2 to get required number of bits

From 6baded9121a570b3660fd86325258dd3c6b1b5e3 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 28 Jun 2022 21:33:31 +0000
Subject: [PATCH 19/23] added rv32 double precision stores - untested

---
 pipelined/src/cache/cache.sv              | 13 ++++++++++---
 pipelined/src/cache/cacheway.sv           | 11 +++++++++--
 pipelined/src/fpu/fctrl.sv                |  6 +++---
 pipelined/src/fpu/fpu.sv                  | 23 ++++++++++++++++++-----
 pipelined/src/ieu/datapath.sv             | 10 ++++++++--
 pipelined/src/ifu/ifu.sv                  |  2 +-
 pipelined/src/lsu/lsu.sv                  |  8 +++++---
 pipelined/src/lsu/subwordread.sv          | 14 +++++++-------
 pipelined/src/wally/wallypipelinedcore.sv | 11 ++++++++---
 9 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index 2374b4938..d380bfc83 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -43,6 +43,9 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   input logic [`PA_BITS-1:0]  PAdr, // physical address
   input logic [(`XLEN-1)/8:0] ByteMask,
   input logic [`XLEN-1:0]     FinalWriteData,
+  input logic [`FLEN-1:0]     FWriteDataM,
+  input logic                        FLoad2,
+  input logic                 FpLoadStoreM,
   output logic                CacheCommitted,
   output logic                CacheStall,
    // to performance counters to cpu
@@ -120,7 +123,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
 
   // Array of cache ways, along with victim, hit, dirty, and read merging logic
   cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) 
-    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
+    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
     .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
     .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, 
     .Invalidate(InvalidateCacheM));
@@ -159,8 +162,12 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
   /////////////////////////////////////////////////////////////////////////////////////////////
-  mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
-		.d1(CacheBusWriteData),	.s(SetValid), .y(CacheWriteData));
+  if (`LLEN>`XLEN)
+    mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
+      .d1({WORDSPERLINE/2{FWriteDataM}}),	.d2(CacheBusWriteData),	.s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
+  else
+    mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
+      .d1(CacheBusWriteData),	.s(SetValid), .y(CacheWriteData));
   mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
 		.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}),
 		.d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}),
diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv
index d9a478612..ac1e26e8f 100644
--- a/pipelined/src/cache/cacheway.sv
+++ b/pipelined/src/cache/cacheway.sv
@@ -38,6 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   input logic [$clog2(NUMLINES)-1:0] RAdr,
   input logic [`PA_BITS-1:0]         PAdr,
   input logic [LINELEN-1:0]          CacheWriteData,
+  input logic                        FLoad2,
   input logic                        SetValidWay,
   input logic                        ClearValidWay,
   input logic                        SetDirtyWay,
@@ -74,8 +75,14 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Write Enable demux
   /////////////////////////////////////////////////////////////////////////////////////////////
-  onehotdecoder #(LOGWPL) adrdec(
-    .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
+  if(`LLEN>`XLEN)begin 
+    logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
+    onehotdecoder #(LOGWPL) adrdec(
+      .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
+    assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
+  end else
+    onehotdecoder #(LOGWPL) adrdec(
+      .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
   // If writing the whole line set all write enables to 1, else only set the correct word.
   assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
   assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR
diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index 60d260027..f6ed650af 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -33,8 +33,8 @@ module fctrl (
                     default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                   endcase
       7'b0100111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
-                    3'b011:  ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
+                    3'b010:  ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsw
+                    3'b011:  ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0; // fsd
                     default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
                   endcase
       7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
@@ -121,7 +121,7 @@ module fctrl (
       assign FmtD = 0;
     else if (`FPSIZES == 2)begin
       logic [1:0] FmtTmp;
-      assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
       assign FmtD = (`FMT == FmtTmp);
     end
     else if (`FPSIZES == 3|`FPSIZES == 4)
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index aba1a8f48..25b39d69b 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -41,10 +41,12 @@ module fpu (
   input logic [4:0] 	   RdM, RdW, // which FP register to write to (from IEU)
   input logic [1:0]        STATUS_FS, // Is floating-point enabled?
   output logic 		   FRegWriteM, // FP register write enable
-  output logic 		   FpLoadM, // Fp load instruction?
+  output logic 		   FpLoadStoreM, // Fp load instruction?
+  output logic              FLoad2,
   output logic 		   FStallD, // Stall the decode stage
   output logic 		   FWriteIntE, // integer register write enables
   output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
+  output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
   output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
   output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
   output logic [1:0]       FResSelW,
@@ -292,8 +294,19 @@ module fpu (
    // data to be stored in memory - to IEU
    //    - FP uses NaN-blocking format
    //        - if there are any unsused bits the most significant bits are filled with 1s
-   if (`FLEN>`XLEN) assign FWriteDataE = FSrcYE[`XLEN-1:0]; 
-   else assign FWriteDataE = {{`XLEN-`FLEN{FSrcYE[`FLEN-1]}}, FSrcYE}; 
+   if (`LLEN==`XLEN) begin
+      assign FWriteDataE = FSrcYE[`XLEN-1:0]; 
+   end else begin
+      logic [`FLEN-1:0] FWriteDataE;
+      if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
+      else assign FLoad2 = FmtM;
+
+      if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
+      else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
+      else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
+
+      flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
+   end
 
    // NaN Block SrcA
    generate
@@ -311,7 +324,7 @@ module fpu (
    assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
 
    // select the result that may be written to the integer register - to IEU
-   if (`FLEN>`XLEN) 
+   if (`FLEN>`XLEN)
       assign IntSrcXE = FSrcXE[`XLEN-1:0];
    else 
       assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
@@ -356,7 +369,7 @@ module fpu (
    //          |||         |||
    //////////////////////////////////////////////////////////////////////////////////////////
 
-   assign FpLoadM = FResSelM[1];
+   assign FpLoadStoreM = FResSelM[1];
 
    postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
                            .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv
index b7a6a9644..df711695e 100644
--- a/pipelined/src/ieu/datapath.sv
+++ b/pipelined/src/ieu/datapath.sv
@@ -124,12 +124,18 @@ module datapath (
   flopenrc #(5)     RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
 
   // floating point interactions: fcvt, fp stores
-  if (`F_SUPPORTED) begin:fpmux
+  if (`F_SUPPORTED&(`LLEN>`XLEN)) begin:fpmux
+    logic [`XLEN-1:0] IFCvtResultW;
+    mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
+    assign WriteDataE = ForwardedSrcBE;
+    mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
+    mux5  #(`XLEN)  resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); 
+  end else if (`F_SUPPORTED) begin:fpmux
     logic [`XLEN-1:0] IFCvtResultW;
     mux2  #(`XLEN)  resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
     mux2  #(`XLEN)  writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
     mux2  #(`XLEN)  cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
-    mux5  #(`XLEN)    resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 
+    mux5  #(`XLEN)  resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW); 
   end else begin:fpmux
     assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
     mux5  #(`XLEN)    resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);	 
diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv
index 29d07cc2c..02e748f31 100644
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@@ -227,7 +227,7 @@ module ifu (
       icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
              .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
              .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), 
-             .CacheFetchLine(ICacheFetchLine),
+             .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
              .CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
              .Cacheable(CacheableF),
              .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv
index 7234a7cac..5c56b1356 100644
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@@ -57,7 +57,9 @@ module lsu (
    input logic              BigEndianM,
    input logic              sfencevmaM,
    // fpu
-   input logic              FpLoadM,
+   input logic [`FLEN-1:0]  FWriteDataM,
+   input logic              FLoad2,
+   input logic              FpLoadStoreM,
    // faults
    output logic             LoadPageFaultM, StoreAmoPageFaultM,
    output logic             LoadMisalignedFaultM, LoadAccessFaultM,
@@ -235,7 +237,7 @@ module lsu (
               .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
         .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
         .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), 
-        .ByteMask(ByteMaskM), .WordCount,
+        .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
         .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
         .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
         .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), 
@@ -269,7 +271,7 @@ module lsu (
   subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
     .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
   subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
-		.FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM);
+		.FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM);
 
   /////////////////////////////////////////////////////////////////////////////////////////////
   // MW Pipeline Register
diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv
index 4a6d99bfc..d38595d49 100644
--- a/pipelined/src/lsu/subwordread.sv
+++ b/pipelined/src/lsu/subwordread.sv
@@ -35,7 +35,7 @@ module subwordread
    input logic [`LLEN-1:0] 	ReadDataWordMuxM,
    input logic [2:0] 		LSUPAdrM,
    input logic [2:0] 		Funct3M,
-   input logic          FpLoadM, 
+   input logic          FpLoadStoreM, 
    output logic [`LLEN-1:0] ReadDataM
    );
 
@@ -83,16 +83,16 @@ module subwordread
     case(Funct3M)
       3'b000:  ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM};                              // lb
       3'b001:  if(`ZFH_SUPPORTED) 
-                    ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
+                    ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
                else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]};         // lh 
       3'b010:  if(`F_SUPPORTED) 
-                    ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]};         // lw/flw
+                    ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]};         // lw/flw
                else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]};                 // lw
       3'b011:  if(`D_SUPPORTED) 
-                    ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]};   // ld/fld
+                    ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]};   // ld/fld
                else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]};           // ld/fld
       3'b100:    if(`Q_SUPPORTED) 
-                    ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
+                    ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
                  else 
                     ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]};    // lbu
       3'b101:  ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]};   // lhu
@@ -122,10 +122,10 @@ module subwordread
     case(Funct3M)
       3'b000:  ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM};                              // lb
       3'b001:  if(`ZFH_SUPPORTED) 
-                    ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
+                    ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
                else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]};         // lh 
       3'b010:  if(`F_SUPPORTED) 
-                    ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]};         // lw/flw
+                    ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]};         // lw/flw
                else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]};                 // lw
       3'b011:  ReadDataM = ReadDataWordMuxM;                      // fld
       3'b100:  ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]};         // lbu
diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv
index b3f11680b..8ef8ec18b 100644
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@@ -92,13 +92,15 @@ module wallypipelinedcore (
   logic             FStallD;
   logic             FWriteIntE;
   logic [`XLEN-1:0]         FWriteDataE;
+  logic                     FLoad2;
+  logic [`FLEN-1:0]         FWriteDataM;
   logic [`XLEN-1:0]         FIntResM;  
   logic [`XLEN-1:0]         FCvtIntResW;  
   logic             FDivBusyE;
   logic             IllegalFPUInstrD, IllegalFPUInstrE;
   logic             FRegWriteM;
   logic             FPUStallD;
-  logic             FpLoadM;
+  logic             FpLoadStoreM;
   logic [1:0]       FResSelW;
   logic [4:0]             SetFflagsM;
 
@@ -253,7 +255,8 @@ module wallypipelinedcore (
   .AtomicM, .TrapM,
   .CommittedM, .DCacheMiss, .DCacheAccess,
   .SquashSCW,            
-  .FpLoadM,
+  .FpLoadStoreM,
+  .FWriteDataM, .FLoad2,
   //.DataMisalignedM(DataMisalignedM),
   .IEUAdrE, .IEUAdrM, .WriteDataE,
   .ReadDataW, .FlushDCacheM,
@@ -391,10 +394,12 @@ module wallypipelinedcore (
          .RdM, .RdW, // which FP register to write to (from IEU)
          .STATUS_FS, // is floating-point enabled?
          .FRegWriteM, // FP register write enable
-         .FpLoadM,
+         .FpLoadStoreM,
+         .FLoad2,
          .FStallD, // Stall the decode stage
          .FWriteIntE, // integer register write enable
          .FWriteDataE, // Data to be written to memory
+         .FWriteDataM, // Data to be written to memory
          .FIntResM, // data to be written to integer register
          .FCvtIntResW, // fp -> int conversion result to be stored in int register
          .FResSelW,   // fpu result selection

From 50b9b4557c923cc06d21a4b038f824f125da3596 Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Wed, 29 Jun 2022 12:23:40 -0700
Subject: [PATCH 20/23] added changes to testbench, tests and riscof for
 additional riscof compatability

---
 pipelined/testbench/testbench.sv           |   6 +-
 pipelined/testbench/tests.vh               | 119 ++++++++++-----------
 tests/riscof/Makefile                      |  32 ++++--
 tests/riscof/sail_cSim/riscof_sail_cSim.py |   2 +-
 tests/riscof/spike/spike_rv32imc_isa.yaml  |   9 +-
 5 files changed, 94 insertions(+), 74 deletions(-)

diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv
index 4a6874c44..1f4f70a08 100644
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@@ -68,6 +68,7 @@ logic [3:0] dummy;
   integer   	ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
 
   logic 	    DCacheFlushDone, DCacheFlushStart;
+  logic riscofTest; 
     
   flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
   flopenr  #(32)   InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW,  dut.core.ifu.InstrM, InstrW);
@@ -174,6 +175,8 @@ logic [3:0] dummy;
       totalerrors = 0;
       testadr = 0;
       testadrNoBase = 0;
+      // riscof tests have a different signature, tests[0] == "1" refers to RiscvArchTests and  tests[0] == "2" refers to WallyRiscvArchTests 
+      riscofTest = tests[0] == "1"; // | tests[0] == "2"; 
       // fill memory with defined values to reduce Xs in simulation
       // Quick note the memory will need to be initialized.  The C library does not
       //  guarantee the  initialized reads.  For example a strcmp can read 6 byte
@@ -250,8 +253,7 @@ logic [3:0] dummy;
           for(i=0; i<SIGNATURESIZE; i=i+1) begin
             sig32[i] = 'bx;
           end
-          // riscof tests have a different signature, tests[0] == "1" refers to RISCVARCHTESTs
-          if (tests[0] == "1") signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
+          if (riscofTest) signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
           else signame = {pathname, tests[test], ".signature.output"};
           // read signature, reformat in 64 bits if necessary
           $readmemh(signame, sig32);
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index c17cef914..39305469d 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -33,8 +33,8 @@
 
 string tvpaths[] = '{
     "../../addins/imperas-riscv-tests/work/",
-    "../../tests/riscof/work/",
-    "../../tests/wally-riscv-arch-test/work/",
+    "../../tests/riscof/work/riscv-arch-test/",
+    "../../tests/riscof/work/wally-riscv-arch-test/",
     "../../tests/imperas-riscv-tests/work/",
     "../../benchmarks/riscv-coremark/work/",
     "../../addins/embench-iot/"
@@ -95,16 +95,16 @@ string tvpaths[] = '{
 
   string wally64a[] = '{
     `WALLYTEST,
-    "rv64i_m/privilege/WALLY-amo",
-    "rv64i_m/privilege/WALLY-lrsc",
-    "rv64i_m/privilege/WALLY-status-fp-enabled-01"
+    "rv64i_m/privilege/src/WALLY-amo.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
   };
 
     string wally32a[] = '{
     `WALLYTEST,
-    "rv32i_m/privilege/WALLY-amo",
-    "rv32i_m/privilege/WALLY-lrsc",
-    "rv32i_m/privilege/WALLY-status-fp-enabled-01"
+    "rv32i_m/privilege/src/WALLY-amo.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
 
   };
 
@@ -1490,41 +1490,40 @@ string imperas32f[] = '{
 
  string wally64i[] = '{
     `WALLYTEST,
-    "rv64i_m/I/WALLY-ADD",
-    "rv64i_m/I/WALLY-SLT",
-    "rv64i_m/I/WALLY-SLTU",
-    "rv64i_m/I/WALLY-SUB",
-    "rv64i_m/I/WALLY-XOR"
+    "rv64i_m/I/src/WALLY-ADD.S/ref/Ref",
+    "rv64i_m/I/src/WALLY-SLT.S/ref/Ref",
+    "rv64i_m/I/src/WALLY-SLTU.S/ref/Ref",
+    "rv64i_m/I/src/WALLY-SUB.S/ref/Ref",
+    "rv64i_m/I/src/WALLY-XOR.S/ref/Ref"
  };
 
  string wally64priv[] = '{
     `WALLYTEST,
-    "rv64i_m/privilege/WALLY-status-tw-01",
-    "rv64i_m/privilege/WALLY-csr-permission-s-01",
-    "rv64i_m/privilege/WALLY-csr-permission-u-01",
-    "rv64i_m/privilege/WALLY-minfo-01",
-    "rv64i_m/privilege/WALLY-misa-01",
-    "rv64i_m/privilege/WALLY-mmu-sv39",
-    "rv64i_m/privilege/WALLY-mmu-sv48",
-    "rv64i_m/privilege/WALLY-pma",
-    "rv64i_m/privilege/WALLY-pmp",
-    "rv64i_m/privilege/WALLY-trap-01",
-    "rv64i_m/privilege/WALLY-trap-s-01",
-    "rv64i_m/privilege/WALLY-trap-u-01",
-    "rv64i_m/privilege/WALLY-mie-01",
-    "rv64i_m/privilege/WALLY-sie-01",
-    "rv64i_m/privilege/WALLY-mtvec-01",
-    "rv64i_m/privilege/WALLY-stvec-01",
-    "rv64i_m/privilege/WALLY-status-mie-01",
-    "rv64i_m/privilege/WALLY-status-sie-01",
-    "rv64i_m/privilege/WALLY-trap-sret-01",
-    "rv64i_m/privilege/WALLY-status-tw-01",
-    "rv64i_m/privilege/WALLY-wfi-01"
+    "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-mmu-sv39.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-mmu-sv48.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-pma.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-pmp.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
+    "rv64i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
  };
 
  string wally64periph[] = '{
     `WALLYTEST,
-    "rv64i_m/privilege/WALLY-periph"
+    "rv64i_m/privilege/src/WALLY-periph.S/ref/Ref"
  };
 
  string wally32e[] = '{
@@ -1569,38 +1568,38 @@ string imperas32f[] = '{
 
 string wally32i[] = '{
     `WALLYTEST,
-    "rv32i_m/I/WALLY-ADD",
-    "rv32i_m/I/WALLY-SLT",
-    "rv32i_m/I/WALLY-SLTU",
-    "rv32i_m/I/WALLY-SUB",
-    "rv32i_m/I/WALLY-XOR"
+    "rv32i_m/I/src/WALLY-ADD.S/ref/Ref",
+    "rv32i_m/I/src/WALLY-SLT.S/ref/Ref",
+    "rv32i_m/I/src/WALLY-SLTU.S/ref/Ref",
+    "rv32i_m/I/src/WALLY-SUB.S/ref/Ref",
+    "rv32i_m/I/src/WALLY-XOR.S/ref/Ref" 
  };
 
  string wally32priv[] = '{
     `WALLYTEST,
-    "rv32i_m/privilege/WALLY-csr-permission-s-01",
-    "rv32i_m/privilege/WALLY-csr-permission-u-01",
-    "rv32i_m/privilege/WALLY-minfo-01",
-    "rv32i_m/privilege/WALLY-misa-01",
-    "rv32i_m/privilege/WALLY-mmu-sv32",
-    "rv32i_m/privilege/WALLY-pma",
-    "rv32i_m/privilege/WALLY-pmp",
-    "rv32i_m/privilege/WALLY-trap-01",
-    "rv32i_m/privilege/WALLY-trap-s-01",
-    "rv32i_m/privilege/WALLY-trap-u-01",
-    "rv32i_m/privilege/WALLY-mie-01",
-    "rv32i_m/privilege/WALLY-sie-01",
-    "rv32i_m/privilege/WALLY-mtvec-01",
-    "rv32i_m/privilege/WALLY-stvec-01",
-    "rv32i_m/privilege/WALLY-status-mie-01",
-    "rv32i_m/privilege/WALLY-status-sie-01",
-    "rv32i_m/privilege/WALLY-trap-sret-01",
-    "rv32i_m/privilege/WALLY-status-tw-01", 
-    "rv32i_m/privilege/WALLY-wfi-01"
+    "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-mmu-sv32.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-pma.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-pmp.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
+    "rv32i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
  };
 
  string wally32periph[] = '{
     `WALLYTEST,
-    "rv32i_m/privilege/WALLY-gpio-01"
+    "rv32i_m/privilege/src/WALLY-gpio-01.S/ref/Ref"
  };
 
diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index dc121f4f0..621a5b54b 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -1,20 +1,40 @@
 arch_dir = ../../addins/riscv-arch-test
+wally_dir = ../wally-riscv-arch-test
 work_dir = ./riscof_work
+work = ./work
+arch_workdir = $(work)/riscv-arch-test
+wally_workdir = $(work)/wally-riscv-arch-test
+
 current_dir = $(shell pwd)
 XLEN    ?= 64
 
-all: build
+all: root build_arch build_wally memfile
 
-build:
+root:
 	mkdir -p $(work_dir)
-	mkdir -p work
+	mkdir -p $(work)
+	mkdir -p $(arch_workdir)
+	mkdir -p $(wally_workdir)
 	sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini
+
+build_arch:
 	riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
-	rm -rf work/rv$(XLEN)i_m
-	mv -f $(work_dir)/rv$(XLEN)i_m work/
+	rm -rf $(arch_workdir)/rv$(XLEN)i_m
+	mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/
+
+build_wally:
+	riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run
+	rm -rf $(wally_workdir)/rv$(XLEN)i_m
+	mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/
+
+memfile: 
+	find $(work) -type f -name "*.elf" | grep "rv64i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done
+	find $(work) -type f -name "*.elf" | grep "rv32i_m" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
+	find $(work) -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
 
 clean:
 	rm -f config64.ini
 	rm -f config32.ini
 	rm -rf $(work_dir)
-	rm -rf work
\ No newline at end of file
+	rm -rf $(wally_workdir)
+	rm -rf $(arch_workdir)
\ No newline at end of file
diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py
index 7a7d16afb..b86f62b55 100644
--- a/tests/riscof/sail_cSim/riscof_sail_cSim.py
+++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py
@@ -101,7 +101,7 @@ class sail_cSim(pluginTemplate):
             execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump')
             sig_file = os.path.join(test_dir, self.name[:-1] + ".signature")
 
-            execute += self.sail_exe[self.xlen] + ' --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)
+            execute += self.sail_exe[self.xlen] + ' -z268435455 --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)
 
             cov_str = ' '
             for label in testentry['coverage_labels']:
diff --git a/tests/riscof/spike/spike_rv32imc_isa.yaml b/tests/riscof/spike/spike_rv32imc_isa.yaml
index 644e97316..5a76fd978 100644
--- a/tests/riscof/spike/spike_rv32imc_isa.yaml
+++ b/tests/riscof/spike/spike_rv32imc_isa.yaml
@@ -1,11 +1,11 @@
 hart_ids: [0]
 hart0:
-  ISA: RV32IMFCZicsr_Zifencei
+  ISA: RV32IMAFCZicsr_Zifencei
   physical_addr_sz: 32
   User_Spec_Version: '2.3'
   supported_xlen: [32]
   misa:
-   reset-val: 0x40001124
+   reset-val: 0x40001125
    rv32:
      accessible: true
      mxl:
@@ -23,7 +23,6 @@ hart0:
            warl:
               dependency_fields: []
               legal:
-                - extensions[25:0] bitmask [0x0001124, 0x0000000]
+                - extensions[25:0] bitmask [0x0001125, 0x0000000]
               wr_illegal:
-                - Unchanged
- 
+                - Unchanged
\ No newline at end of file

From 11956d0661f59951e9406b4734524cef6be0f18d Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Wed, 29 Jun 2022 13:40:11 -0700
Subject: [PATCH 21/23] ./regression-wally -buildroot or ./regression-wally
 -all now builds Linux from instruction 0 instead of trying to reach
 instruction 246000000

---
 pipelined/regression/regression-wally | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally
index 07058241d..821246a5f 100755
--- a/pipelined/regression/regression-wally
+++ b/pipelined/regression/regression-wally
@@ -44,16 +44,18 @@ configs = [
         grepstr="All lints run with no errors or warnings"
     )
 ]
-def getBuildrootTC(short):
+def getBuildrootTC(boot):
     INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
     MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
-    if short:
+    if boot:
+        name="buildrootboot"
+        BRcmd="vsim > {} -c <<!\ndo wally-pipelined.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
+        BRgrepstr="WallyHostname login:"
+    else:
+        name="buildroot"
         BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
         BRgrepstr=str(INSTR_LIMIT)+" instructions"
-    else:
-        BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV 0 1 0\n!"
-        BRgrepstr=str(MAX_EXPECTED)+" instructions"
-    return  TestCase(name="buildroot",variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
+    return  TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
 
 tc = TestCase(
       name="buildroot-checkpoint",
@@ -136,14 +138,14 @@ def main():
         os.system('./make-tests.sh | tee ./logs/make-tests.log')
 
     if '-all' in sys.argv:
-        TIMEOUT_DUR = 30*3600 # seconds
-        configs.append(getBuildrootTC(short=False))
+        TIMEOUT_DUR = 30*7200 # seconds
+        configs.append(getBuildrootTC(boot=True))
     elif '-buildroot' in sys.argv:
-        TIMEOUT_DUR = 30*3600 # seconds
-        configs=[getBuildrootTC(short=False)]
+        TIMEOUT_DUR = 30*7200 # seconds
+        configs=[getBuildrootTC(boot=True)]
     else:
         TIMEOUT_DUR = 10*60 # seconds
-        configs.append(getBuildrootTC(short=True))
+        configs.append(getBuildrootTC(boot=False))
 
     # Scale the number of concurrent processes to the number of test cases, but
     # max out at a limited number of concurrent processes to not overwhelm the system

From 575b73fa8c591180ac48d937ee2e71ab2707d521 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 1 Jul 2022 14:55:46 -0700
Subject: [PATCH 22/23] some prostprocessing cleanup

---
 pipelined/src/fpu/flags.sv         |   5 +-
 pipelined/src/fpu/lzacorrection.sv |  34 +++----
 pipelined/src/fpu/postprocess.sv   | 152 +++++++++++++++--------------
 pipelined/src/fpu/resultselect.sv  |  12 +--
 pipelined/src/fpu/round.sv         |  62 ++++++------
 5 files changed, 136 insertions(+), 129 deletions(-)

diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index ff6495dd9..a425ad9b4 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -4,7 +4,7 @@ module flags(
     input logic                 XSgnM,
     input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
     input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic Plus1,
+    input logic                 Plus1,
     input logic                 InfIn,                  // is a Inf input being used
     input logic                 XZeroM, YZeroM,         // inputs are zero
     input logic                 XNaNM, YNaNM,           // inputs are NaN
@@ -25,7 +25,7 @@ module flags(
     input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
     input logic                 Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
     output logic                DivByZero,
-    output logic                IntInvalid, Invalid, Overflow, Underflow, // flags used to select the res
+    output logic                IntInvalid, Invalid, Overflow, // flags used to select the res
     output logic [4:0]          PostProcFlgM // flags
 );
     logic               SigNaN;     // is an input a signaling NaN
@@ -34,6 +34,7 @@ module flags(
     logic               IntInexact; // integer inexact flag
     logic               FmaInvalid; // integer invalid flag
     logic               DivInvalid; // integer invalid flag
+    logic               Underflow;   // Underflow flag
     logic               ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
     logic               ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift")
 
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv
index e5a2d5c34..a7a8143eb 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@@ -1,24 +1,24 @@
 `include "wally-config.vh"
 
 module lzacorrection(
-    input logic  [`NORMSHIFTSZ-1:0]     Shifted,         // the shifted sum before LZA correction
-    input logic                         FmaOp,
-    input logic                         DivOp,
-    input logic DivResDenorm,
-    input logic  [`NE+1:0] DivCalcExpM,
-    input logic [`NE+1:0] DivDenormShift,
-    input logic  [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
-    input logic                         PreResultDenorm,    // is the result denormalized - calculated before LZA corection
-    input logic                         KillProdM,  // is the product set to zero
-    input logic                         SumZero,
-    output logic  [`CORRSHIFTSZ-1:0]    CorrShifted,         // the shifted sum before LZA correction
-    output logic [`NE+1:0] CorrDivExp,
-    output logic [`NE+1:0]              SumExp         // exponent of the normalized sum
+    input logic  [`NORMSHIFTSZ-1:0] Shifted,         // the shifted sum before LZA correction
+    input logic                     FmaOp,
+    input logic                     DivOp,
+    input logic                     DivResDenorm,
+    input logic  [`NE+1:0]          DivCalcExpM,
+    input logic  [`NE+1:0]          DivDenormShift,
+    input logic  [`NE+1:0]          ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic                     PreResultDenorm,    // is the result denormalized - calculated before LZA corection
+    input logic                     KillProdM,  // is the product set to zero
+    input logic                     SumZero,
+    output logic [`CORRSHIFTSZ-1:0] CorrShifted,         // the shifted sum before LZA correction
+    output logic [`NE+1:0]          CorrDivExp,
+    output logic [`NE+1:0]          SumExp         // exponent of the normalized sum
 );
-    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
-    logic [`CORRSHIFTSZ:0]           CorrQuotShifted;
-    logic                        ResDenorm;    // is the result denormalized
-    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
+    logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
+    logic [`CORRSHIFTSZ:0] CorrQuotShifted;
+    logic                  ResDenorm;    // is the result denormalized
+    logic                  LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
 
     // LZA correction
     assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index ab06a9406..20cea2b61 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -30,101 +30,109 @@
 `include "wally-config.vh"
 
 module postprocess(
+    // general signals
     input logic                             XSgnM, YSgnM,  // input signs
-    input logic     [`NE-1:0]               ZExpM, // input exponents
-    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic     [`FMTBITS-1:0]          FmtM,       // precision 1 = double 0 = single
-    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
-    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic  [`NE-1:0]                  ZExpM, // input exponents
+    input logic  [`NF:0]                    XManM, YManM, ZManM, // input mantissas
+    input logic  [2:0]                      FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic  [`FMTBITS-1:0]             FmtM,       // precision 1 = double 0 = single
+    input logic  [2:0]                      FOpCtrlM,       // choose which opperation (look below for values)
     input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
     input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
     input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
     input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic     [3*`NF+5:0]             SumM,       // the positive sum
+    input logic                             ZDenormM, // is the original precision denormalized
+    input logic  [1:0]                      PostProcSelM, // select result to be written to fp register
+    //fma signals
+    input logic  [`NE+1:0]                  ProdExpM,       // X exponent + Y exponent - bias
+    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic  [3*`NF+5:0]                SumM,       // the positive sum
     input logic                             NegSumM,    // was the sum negitive
     input logic                             InvZM,      // do you invert Z
-    input logic                             ZDenormM, // is the original precision denormalized
     input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
     input logic                             PSgnM,      // the product's sign
-    input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
-    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
-    input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
-    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
-    input logic [`NE+1:0]           DivCalcExpM,    // the calculated expoent
-    input logic CvtResDenormUfM,
-    input logic DivStickyM,
-    input logic DivNegStickyM,
-	input logic [`LOGCVTLEN-1:0] CvtShiftAmtM,  // how much to shift by
-    input logic                   CvtResSgnM,     // the result's sign
-    input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
-    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
-    input logic             IntZeroM,         // is the input zero
-    input logic [1:0] PostProcSelM, // select result to be written to fp register
-    input logic [`DIVLEN+2:0]   Quot,
-    output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
-    output logic    [4:0]          PostProcFlgM,
-    output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
+    input logic  [$clog2(3*`NF+7)-1:0]      FmaNormCntM,   // the normalization shift count
+    //divide signals
+    input logic  [$clog2(`DIVLEN/2+3)-1:0]  EarlyTermShiftDiv2M,
+    input logic  [`NE+1:0]                  DivCalcExpM,    // the calculated expoent
+    input logic                             DivStickyM,
+    input logic                             DivNegStickyM,
+    input logic  [`DIVLEN+2:0]              Quot,
+    // conversion signals
+    input logic  [`NE:0]                    CvtCalcExpM,    // the calculated expoent
+    input logic                             CvtResDenormUfM,
+	input logic  [`LOGCVTLEN-1:0]           CvtShiftAmtM,  // how much to shift by
+    input logic                             CvtResSgnM,     // the result's sign
+    input logic                             FWriteIntM,     // is fp->int (since it's writting to the integer register)
+    input logic  [`CVTLEN-1:0]              CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic                             IntZeroM,         // is the input zero
+    // final results
+    output logic [`FLEN-1:0]                PostProcResM,    // FMA final result
+    output logic [4:0]                      PostProcFlgM,
+    output logic [`XLEN-1:0]                FCvtIntResM    // the int conversion result
     );
    
-
-
-    logic [`NF-1:0]     ResFrac; // Result fraction
-    logic [`NE-1:0]     ResExp;  // Result exponent
-    logic  [`CORRSHIFTSZ-1:0]    CorrShifted;         // the shifted sum before LZA correction
-    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
-    logic [`NE+1:0]     FullResExp;  // ResExp with bits to determine sign and overflow
-    logic               SumZero;        // is the sum zero
-    logic               Sticky;           // Sticky bit
-    logic [3*`NF+8:0]            FmaShiftIn;        // is the sum zero
-    logic               UfPlus1;                    // do you add one (for determining underflow flag)
-    logic               Round;   // bits needed to determine rounding
-    logic [`CVTLEN+`NF:0]    CvtShiftIn;    // number to be shifted
-    logic               Mult;       // multiply opperation
-    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
-    logic [`NE+1:0]     ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
-    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt;   // normalization shift count
-    logic [$clog2(`NORMSHIFTSZ)-1:0]  ShiftAmt;   // normalization shift count
-    logic [$clog2(`NORMSHIFTSZ)-1:0]  DivShiftAmt;
-    logic [`NORMSHIFTSZ-1:0]            ShiftIn;        // is the sum zero
-    logic [`NORMSHIFTSZ-1:0] DivShiftIn;
-    logic [`NORMSHIFTSZ-1:0]    Shifted;    // the shifted result
-    logic                   Plus1;      // add one to the final result?
-    logic                   IntInvalid, Overflow, Underflow, Invalid; // flags
-    logic                   Signed;     // is the opperation with a signed integer?
-    logic                   Int64;      // is the integer 64 bits?
-    logic                   IntToFp;       // is the opperation an int->fp conversion?
-    logic                   ToInt;      // is the opperation an fp->int conversion?
+    // general signals
+    logic [`NF-1:0] ResFrac; // Result fraction
+    logic [`NE-1:0] ResExp;  // Result exponent
+    logic [`CORRSHIFTSZ-1:0] CorrShifted; // corectly shifted fraction
+    logic [`NE+1:0] FullResExp;  // ResExp with bits to determine sign and overflow
+    logic Sticky;           // Sticky bit
+    logic UfPlus1;                    // do you add one (for determining underflow flag)
+    logic Round;   // bits needed to determine rounding
+    logic [`FLEN:0] RoundAdd;       // how much to add to the result
+    logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt;   // normalization shift count
+    logic [`NORMSHIFTSZ-1:0] ShiftIn;        // is the sum zero
+    logic [`NORMSHIFTSZ-1:0] Shifted;    // the shifted result
+    logic Plus1;      // add one to the final result?
+    logic IntInvalid, Overflow, Invalid; // flags
     logic [`NE+1:0] RoundExp;
-    logic [`NE+1:0] CorrDivExp;
-    logic [1:0] NegResMSBS;
-    logic CvtOp;
-    logic FmaOp;
-    logic CvtResUf;
-    logic DivOp;
-    logic InfIn;
     logic ResSgn;
     logic RoundSgn;
-    logic NaNIn;
-    logic DivByZero;
     logic UfLSBRes;
-    logic Sqrt;
     logic [`FMTBITS-1:0] OutFmt;
+    // fma signals
+    logic [`NE+1:0] SumExp;     // exponent of the normalized sum
+    logic SumZero;        // is the sum zero
+    logic [3*`NF+8:0] FmaShiftIn;        // is the sum zero
+    logic [`NE+1:0] ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+    logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt;   // normalization shift count
+    // division singals
+    logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
+    logic [`NORMSHIFTSZ-1:0] DivShiftIn;
+    logic [`NE+1:0] CorrDivExp;
+    logic DivByZero;
     logic DivResDenorm;
     logic [`NE+1:0] DivDenormShift;
+    // conversion signals
+    logic [`CVTLEN+`NF:0] CvtShiftIn;    // number to be shifted
+    logic [1:0] NegResMSBS;
+    logic CvtResUf;
+    // readability signals
+    logic Mult;       // multiply opperation
+    logic Int64;      // is the integer 64 bits?
+    logic Signed;     // is the opperation with a signed integer?
+    logic IntToFp;       // is the opperation an int->fp conversion?
+    logic ToInt;      // is the opperation an fp->int conversion?
+    logic CvtOp;
+    logic FmaOp;
+    logic DivOp;
+    logic InfIn;
+    logic NaNIn;
+    logic Sqrt;
 
     // signals to help readability
-    assign Signed = FOpCtrlM[0];
-    assign Int64 =  FOpCtrlM[1];
-    assign IntToFp =   FOpCtrlM[2];
-    assign ToInt =  FWriteIntM;
+    assign Signed =  FOpCtrlM[0];
+    assign Int64 =   FOpCtrlM[1];
+    assign IntToFp = FOpCtrlM[2];
+    assign ToInt =   FWriteIntM;
     assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
     assign CvtOp = (PostProcSelM == 2'b00);
     assign FmaOp = (PostProcSelM == 2'b10);
     assign DivOp = (PostProcSelM == 2'b01);
-    assign Sqrt = FOpCtrlM[0];
+    assign Sqrt =  FOpCtrlM[0];
 
     // is there an input of infinity or NaN being used
     assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
@@ -205,7 +213,7 @@ module postprocess(
                 .XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
                 .XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero,
                 .UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
-                .RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
+                .RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Select the result
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index d6d15e46f..50ef1b6be 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -4,29 +4,27 @@ module resultselect(
     input logic                     XSgnM,        // input signs
     input logic     [`NE-1:0]       ZExpM, // input exponents
     input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
+    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
     input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic     [`FMTBITS-1:0]  OutFmt,       // output format
     input logic                     InfIn,
-    input logic                     XInfM,
-    input logic                     YInfM,
-    input logic                     DivOp,
-    input logic                     XZeroM,
+    input logic                     XInfM, YInfM,
+    input logic                     XZeroM, ZZeroM,
     input logic                     IntZeroM,
     input logic                     NaNIn,
     input logic                     IntToFp,
     input logic                     Int64,
     input logic                     Signed,
     input logic                     CvtOp,
-    input logic [`NORMSHIFTSZ-1:0]             Shifted,        // is the sum zero
+    input logic                     DivOp,
     input logic                     FmaOp,
+    input logic [`NORMSHIFTSZ-1:0]  Shifted,        // is the sum zero
     input logic                     Plus1,
     input logic                     DivByZero,
     input logic [`NE:0]             CvtCalcExpM,    // the calculated expoent
     input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
     input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
     input logic                     ZDenormM, // is the original precision denormalized
-    input logic 		            ZZeroM,
     input logic                     ResSgn,  // the res's sign
     input logic     [`FLEN:0]       RoundAdd,   // how much to add to the res
     input logic                     IntInvalid, Invalid, Overflow,  // flags
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 7d4153118..532e17290 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -8,42 +8,42 @@
 `define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
 
 module round(
-    input logic  [`FMTBITS-1:0] OutFmt,       // precision 1 = double 0 = single
-    input logic  [2:0]          FrmM,       // rounding mode
-    input logic                 FmaOp,
-    input logic                 DivOp,
-    input logic [1:0] PostProcSelM,
-    input logic                 CvtResDenormUfM,
-    input logic                 ToInt,
-    input logic                 CvtOp,
-    input logic                 CvtResUf,
-    input logic [`CORRSHIFTSZ-1:0]  CorrShifted,
-    input logic                 AddendStickyM,  // addend's sticky bit
-    input logic                 ZZeroM,         // is Z zero
-    input logic                 InvZM,          // invert Z
-    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    input logic                 RoundSgn,      // the result's sign
-    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
-    input logic [`NE+1:0]           CorrDivExp,    // the calculated expoent
-    input logic                DivStickyM,             // sticky bit
-    input logic DivNegStickyM,
-    output logic                UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]      FullResExp,      // ResExp with bits to determine sign and overflow
-    output logic [`NF-1:0]      ResFrac,         // Result fraction
-    output logic [`NE-1:0]      ResExp,          // Result exponent
-    output logic                Sticky,             // sticky bit
-    output logic [`NE+1:0] RoundExp,
-    output logic Plus1,
-    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
-    output logic                Round, UfLSBRes // bits needed to calculate rounding
+    input logic  [`FMTBITS-1:0]     OutFmt,       // precision 1 = double 0 = single
+    input logic  [2:0]              FrmM,       // rounding mode
+    input logic                     FmaOp,
+    input logic                     DivOp,
+    input logic                     CvtOp,
+    input logic                     ToInt,
+    input logic  [1:0]              PostProcSelM,
+    input logic                     CvtResDenormUfM,
+    input logic                     CvtResUf,
+    input logic  [`CORRSHIFTSZ-1:0] CorrShifted,
+    input logic                     AddendStickyM,  // addend's sticky bit
+    input logic                     ZZeroM,         // is Z zero
+    input logic                     InvZM,          // invert Z
+    input logic  [`NE+1:0]          SumExp,         // exponent of the normalized sum
+    input logic                     RoundSgn,      // the result's sign
+    input logic  [`NE:0]            CvtCalcExpM,    // the calculated expoent
+    input logic  [`NE+1:0]          CorrDivExp,    // the calculated expoent
+    input logic                     DivStickyM,             // sticky bit
+    input logic                     DivNegStickyM,
+    output logic                    UfPlus1,  // do you add or subtract on from the result
+    output logic [`NE+1:0]          FullResExp,      // ResExp with bits to determine sign and overflow
+    output logic [`NF-1:0]          ResFrac,         // Result fraction
+    output logic [`NE-1:0]          ResExp,          // Result exponent
+    output logic                    Sticky,             // sticky bit
+    output logic [`NE+1:0]          RoundExp,
+    output logic                    Plus1,
+    output logic [`FLEN:0]          RoundAdd,           // how much to add to the result
+    output logic                    Round, UfLSBRes // bits needed to calculate rounding
 );
     logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
     logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
     logic           UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
-    logic                 NormSumSticky;  // normalized sum's sticky bit
-    logic                 UfSticky;   // sticky bit for underlow calculation
+    logic           NormSumSticky;  // normalized sum's sticky bit
+    logic           UfSticky;   // sticky bit for underlow calculation
     logic [`NF-1:0] RoundFrac;
-    logic FpRes, IntRes;
+    logic           FpRes, IntRes;
     logic           UfRound;
     logic           FpRound, FpLSBRes, FpUfRound;
     logic           CalcPlus1, FpPlus1;

From 8cc051915dcc650d439d4030a583ce9d9842c22c Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Fri, 1 Jul 2022 16:28:29 -0700
Subject: [PATCH 23/23] Fixed make error

---
 tests/riscof/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index 621a5b54b..830b9eef6 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
 current_dir = $(shell pwd)
 XLEN    ?= 64
 
-all: root build_arch build_wally memfile
+all: root build_arch # build_wally memfile
 
 root:
 	mkdir -p $(work_dir)