From 4c066c078fa07a6d25975348942f3c7ca451d23e Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Mon, 10 Jun 2024 07:38:03 -0700
Subject: [PATCH 1/4] Removing two unnecessary 0's from fmashiftcalc interface

---
 src/fpu/postproc/fmashiftcalc.sv | 20 +++++++++-----------
 src/fpu/postproc/postprocess.sv  |  9 ++++-----
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv
index 27f39e2a5..3a03aff8f 100644
--- a/src/fpu/postproc/fmashiftcalc.sv
+++ b/src/fpu/postproc/fmashiftcalc.sv
@@ -28,18 +28,17 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
-  input  logic [P.FMTBITS-1:0]         Fmt,                 // precision 1 = double 0 = single
-  input  logic [P.NE+1:0]              FmaSe,               // sum's exponent
-  input  logic [P.FMALEN-1:0]          FmaSm,               // the positive sum
+  input  logic [P.FMTBITS-1:0]          Fmt,                 // precision 1 = double 0 = single
+  input  logic [P.NE+1:0]               FmaSe,               // sum's exponent
+  input  logic [P.FMALEN-1:0]           FmaSm,               // the positive sum
   input  logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt,             // normalization shift count
-  output logic [P.NE+1:0]              NormSumExp,          // exponent of the normalized sum not taking into account Subnormal or zero results
-  output logic                         FmaSZero,            //  is the sum zero
-  output logic                         FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
-  output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt,         // normalization shift count
-  output logic [P.FMALEN+1:0]          FmaShiftIn           
+  output logic [P.NE+1:0]               NormSumExp,          // exponent of the normalized sum not taking into account Subnormal or zero results
+  output logic                          FmaSZero,            // is the sum zero
+  output logic                          FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
+  output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt          // normalization shift count
 );
-  logic [P.NE+1:0]                     PreNormSumExp;       // the exponent of the normalized sum with the P.FLEN bias
-  logic [P.NE+1:0]                     BiasCorr;            // correction for bias
+  logic [P.NE+1:0]                      PreNormSumExp;       // the exponent of the normalized sum with the P.FLEN bias
+  logic [P.NE+1:0]                      BiasCorr;            // correction for bias
 
   ///////////////////////////////////////////////////////////////////////////////
   // Normalization
@@ -130,7 +129,6 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
 
   // set and calculate the shift input and amount
   //  - shift once if killing a product and the result is subnormal
-  assign FmaShiftIn = {2'b0, FmaSm};
   if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1;
   else                assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1;
 endmodule
diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv
index 4e893a82e..20968dad7 100644
--- a/src/fpu/postproc/postprocess.sv
+++ b/src/fpu/postproc/postprocess.sv
@@ -44,7 +44,7 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   input logic                              FmaPs,               // the product's sign
   input logic                              FmaSs,               // Sum sign
   input logic  [P.NE+1:0]                  FmaSe,               // the sum's exponent
-  input logic  [P.FMALEN-1:0]                FmaSm,               // the positive sum
+  input logic  [P.FMALEN-1:0]              FmaSm,               // the positive sum
   input logic                              FmaASticky,          // sticky bit that is calculated during alignment
   input logic  [$clog2(P.FMALEN+1)-1:0]      FmaSCnt,             // the normalization shift count
   //divide signals
@@ -86,7 +86,6 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   // fma signals
   logic [P.NE+1:0]             FmaMe;                // exponent of the normalized sum
   logic                        FmaSZero;             // is the sum zero
-  logic [P.FMALEN+1:0]         FmaShiftIn;           // fma shift input
   logic [P.NE+1:0]             NormSumExp;           // exponent of the normalized sum not taking into account Subnormal or zero results
   logic                        FmaPreResultSubnorm;  // is the result subnormal - calculated before LZA corection
   logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt;          // normalization shift amount for fma
@@ -145,8 +144,8 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   cvtshiftcalc #(P) cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,  
       .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
 
-  fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
-      .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
+  fmashiftcalc #(P) fmashiftcalc(.FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSm,
+      .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt);
 
   divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
 
@@ -155,7 +154,7 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
     case(PostProcSel)
       2'b10: begin // fma
         ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt};
-        ShiftIn  =  {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}};
+        ShiftIn  =  {{2'b00, FmaSm}, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}};
       end
       2'b00: begin // cvt
         ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt};

From 3284dd21126da2166ba2aa6d91e2a893664cfa36 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Mon, 10 Jun 2024 07:45:03 -0700
Subject: [PATCH 2/4] Removed unnecessary Zero checking on FmaPreResultSubnorm

---
 src/fpu/postproc/fmashiftcalc.sv | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv
index 3a03aff8f..1d33f7337 100644
--- a/src/fpu/postproc/fmashiftcalc.sv
+++ b/src/fpu/postproc/fmashiftcalc.sv
@@ -78,19 +78,19 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
     assign NormSumExp = PreNormSumExp+BiasCorr;
   end
   
-  // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero
+  // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen)
   if (P.FPSIZES == 1) begin
     logic Sum0LEZ, Sum0GEFL;
     assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
     assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-1)); // changed from -2 dh 4/3/24 for issue 655
-    assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+    assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL;
   end else if (P.FPSIZES == 2) begin
     logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
     assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
     assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-1)); // changed from -2 dh 4/3/24 for issue 655
     assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1));
     assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-1+P.BIAS-P.BIAS1)) | ~|PreNormSumExp;
-    assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
+    assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL);
   end else if (P.FPSIZES == 3) begin
     logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
     assign Sum0LEZ  = PreNormSumExp[P.NE+1] | ~|PreNormSumExp;
@@ -101,9 +101,9 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
     assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF2-1+P.BIAS-P.BIAS2)) | ~|PreNormSumExp;
     always_comb begin
       case (Fmt)
-        P.FMT: FmaPreResultSubnorm   = Sum0LEZ & Sum0GEFL; // & ~FmaSZero; // checking sum is not zero is harmless but turns out to be unnecessary
-        P.FMT1: FmaPreResultSubnorm  = Sum1LEZ & Sum1GEFL; // & ~FmaSZero;
-        P.FMT2: FmaPreResultSubnorm  = Sum2LEZ & Sum2GEFL; // & ~FmaSZero; 
+        P.FMT: FmaPreResultSubnorm   = Sum0LEZ & Sum0GEFL;
+        P.FMT1: FmaPreResultSubnorm  = Sum1LEZ & Sum1GEFL;
+        P.FMT2: FmaPreResultSubnorm  = Sum2LEZ & Sum2GEFL;
         default: FmaPreResultSubnorm = 1'bx;
       endcase
     end
@@ -119,10 +119,10 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
     assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.H_NF-1+P.BIAS-P.H_BIAS)) | ~|PreNormSumExp;
     always_comb begin
       case (Fmt)
-        2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
-        2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
-        2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
-        2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
+        2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL;
+        2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL;
+        2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL;
+        2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL;
       endcase
     end
   end

From e02c1008bc581684ef11902c1f3a54589f071bf8 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Mon, 10 Jun 2024 07:55:35 -0700
Subject: [PATCH 3/4] postprocessor shift amount simplification

---
 src/fpu/postproc/divshiftcalc.sv | 4 ----
 src/fpu/postproc/fmashiftcalc.sv | 4 ++--
 src/fpu/postproc/postprocess.sv  | 5 ++---
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv
index 0a222d724..d45afeea6 100644
--- a/src/fpu/postproc/divshiftcalc.sv
+++ b/src/fpu/postproc/divshiftcalc.sv
@@ -28,10 +28,8 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 module divshiftcalc import cvw::*;  #(parameter cvw_t P) (
-  input  logic [P.DIVb:0]              DivUm,              // divsqrt significand
   input  logic [P.NE+1:0]              DivUe,              // divsqrt exponent
   output logic [P.LOGNORMSHIFTSZ-1:0]  DivShiftAmt,        // divsqrt shift amount
-  output logic [P.NORMSHIFTSZ-1:0]     DivShiftIn,         // divsqrt shift input
   output logic                         DivResSubnorm,      // is the divsqrt result subnormal
   output logic                         DivSubnormShiftPos  // is the subnormal shift amount positive
 );
@@ -68,6 +66,4 @@ module divshiftcalc import cvw::*;  #(parameter cvw_t P) (
   assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0;
   assign DivShiftAmt        = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
 
-  // pre-shift the divider result for normalization
-  assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
 endmodule
diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv
index 1d33f7337..cf334aa9b 100644
--- a/src/fpu/postproc/fmashiftcalc.sv
+++ b/src/fpu/postproc/fmashiftcalc.sv
@@ -53,6 +53,7 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
   //convert the sum's exponent into the proper precision
   if (P.FPSIZES == 1) begin
     assign NormSumExp = PreNormSumExp;
+    assign BiasCorr = '0;
   end else if (P.FPSIZES == 2) begin
     assign BiasCorr = Fmt ? (P.NE+2)'(0) : (P.NE+2)'(P.BIAS1-P.BIAS);
     assign NormSumExp = PreNormSumExp+BiasCorr;
@@ -129,6 +130,5 @@ module fmashiftcalc import cvw::*;  #(parameter cvw_t P) (
 
   // set and calculate the shift input and amount
   //  - shift once if killing a product and the result is subnormal
-  if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1;
-  else                assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1;
+  assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1;
 endmodule
diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv
index 20968dad7..2db03cb16 100644
--- a/src/fpu/postproc/postprocess.sv
+++ b/src/fpu/postproc/postprocess.sv
@@ -91,7 +91,6 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt;          // normalization shift amount for fma
   // division signals
   logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt;          // divsqrt shif amount
-  logic [P.NORMSHIFTSZ-1:0]    DivShiftIn;           // divsqrt shift input
   logic [P.NE+1:0]             Ue;                   // divsqrt corrected exponent after corretion shift
   logic                        DivByZero;            // divide by zero flag
   logic                        DivResSubnorm;        // is the divsqrt result subnormal
@@ -147,7 +146,7 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
   fmashiftcalc #(P) fmashiftcalc(.FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSm,
       .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt);
 
-  divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
+  divshiftcalc #(P) divshiftcalc(.DivUe, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt);
 
   // select which unit's output to shift
   always_comb
@@ -162,7 +161,7 @@ module postprocess import cvw::*;  #(parameter cvw_t P) (
       end
       2'b01: begin //divsqrt
         ShiftAmt = DivShiftAmt;
-        ShiftIn  =  DivShiftIn;
+        ShiftIn  = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
       end
       default: begin 
         ShiftAmt = {P.LOGNORMSHIFTSZ{1'bx}}; 

From 29fe5983e266041fc39a88dadc3972380a1ef64c Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Tue, 11 Jun 2024 12:32:11 -0700
Subject: [PATCH 4/4] Fixed testfloat regression and added bitmanip/crypto
 variants

---
 bin/regression-wally | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/bin/regression-wally b/bin/regression-wally
index 4e72fae66..f85800bba 100755
--- a/bin/regression-wally
+++ b/bin/regression-wally
@@ -99,6 +99,29 @@ derivconfigtests = [
         ["zaamo_rv32gc", ["arch32i", "arch32a_amo"]],
         ["zalrsc_rv32gc", ["arch32i", "wally32a_lrsc"]],
 
+# Bit manipulation and crypto variants
+        ["zba_rv32gc", ["arch32i", "arch32zba"]],
+        ["zbb_rv32gc", ["arch32i", "arch32zbb"]],
+        ["zbc_rv32gc", ["arch32i", "arch32zbc"]],
+        ["zbs_rv32gc", ["arch32i", "arch32zbs"]],
+        ["zbkb_rv32gc", ["arch32i", "arch32zbkb"]],
+        ["zbkc_rv32gc", ["arch32i", "arch32zbkc"]],
+        ["zbkx_rv32gc", ["arch32i", "arch32zbkx"]],
+        ["zkne_rv32gc", ["arch32i", "arch32zkne"]],
+        ["zknd_rv32gc", ["arch32i", "arch32zknd"]],
+        ["zknh_rv32gc", ["arch32i", "arch32zknh"]],
+
+        ["zba_rv64gc", ["arch64i", "arch64zba"]],
+        ["zbb_rv64gc", ["arch64i", "arch64zbb"]],
+        ["zbc_rv64gc", ["arch64i", "arch64zbc"]],
+        ["zbs_rv64gc", ["arch64i", "arch64zbs"]],
+        ["zbkb_rv64gc", ["arch64i", "arch64zbkb"]],
+        ["zbkc_rv64gc", ["arch64i", "arch64zbkc"]],
+        ["zbkx_rv64gc", ["arch64i", "arch64zbkx"]],
+        ["zkne_rv64gc", ["arch64i", "arch64zkne"]],
+        ["zknd_rv64gc", ["arch64i", "arch64zknd"]],
+        ["zknh_rv64gc", ["arch64i", "arch64zknh"]],
+
         ### add misaligned tests
 
         # fp/int divider permutations
@@ -325,7 +348,8 @@ else:
 
     # run derivative configurations in nightly regression
 if (nightly):
-    addTests(tests_buildrootboot, defaultsim)
+#    addTests(tests_buildrootboot, defaultsim)
+    addTests(tests_buildrootshort, defaultsim)
     addTests(derivconfigtests, defaultsim)
 else:
     addTests(tests_buildrootshort, defaultsim)
@@ -389,7 +413,7 @@ if (testfloat or nightly): # for nightly, run testfloat along with othres
             tc = TestCase(
                     name=test,
                     variant=config,
-                    cmd="wsim --tb testbench_fp --sim questa " + config + " " + test + " > " + sim_log,
+                    cmd="wsim --tb testbench_fp " + config + " " + test + " > " + sim_log,
                     grepstr="All Tests completed with          0 errors",
                     grepfile = WALLY + "/sim/questa/logs/"+config+"_"+test+".log")
             configs.append(tc)
@@ -415,7 +439,7 @@ def main():
     elif '--nightly' in sys.argv:
         TIMEOUT_DUR = 60*1440 # 1 day
     elif '--testfloat' in sys.argv:
-        TIMEOUT_DUR = 5*60 # seconds
+        TIMEOUT_DUR = 30*60 # seconds
     else:
         TIMEOUT_DUR = 10*60 # seconds