From 949f53695def9e808af913a6d028905424bc2277 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 1 Jun 2022 00:07:36 +0000
Subject: [PATCH 1/6] Fixed typos

---
 addins/riscv-arch-test |  2 +-
 pipelined/srt/srt.sv   | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index be67c99bd..307c77b26 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
+Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 835b1d1b6..2275b93ed 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -44,7 +44,7 @@ module srt #(parameter Nf=52) (
   input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
-  input  logic       Int, // Choose integer inputss
+  input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       rsign,
   output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
@@ -52,7 +52,7 @@ module srt #(parameter Nf=52) (
   output logic [3:0] Flags
 );
 
-  logic          qp, qz, qm; // quotient is +1, 0, or -1
+  logic           qp, qz, qm; // quotient is +1, 0, or -1
   logic [`NE-1:0] calcExp;
   logic           calcSign;
   logic [Nf-1:0]  X, Dpreproc;
@@ -223,17 +223,17 @@ module otfc2 #(parameter N=52) (
   output logic [N-1:0] r
 );
 
-  // The on-the-fly converter transfers the quotient 
+  //  The on-the-fly converter transfers the quotient 
   //  bits to the quotient as they come. 
   //
-  // This code follows the psuedocode presented in the 
+  //  This code follows the psuedocode presented in the 
   //  floating point chapter of the book. Right now, 
   //  it is written for Radix-2 division.
   //
-  // QM is Q-1. It allows us to write negative bits 
+  //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
   logic [N+2:0] Q, QM, QNext, QMNext;
-  // QR and QMR are the shifted versions of Q and QM.
+  //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
   logic [N+1:0] QR, QMR;

From dd19e55b8f731736937800fe0e3d0f5fbbb11ecd Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 1 Jun 2022 16:52:21 +0000
Subject: [PATCH 2/6] unpacker optimizations

---
 addins/riscv-arch-test              |   2 +-
 pipelined/src/fpu/fma.sv            |  21 ++--
 pipelined/src/fpu/fpu.sv            |   4 +-
 pipelined/src/fpu/unpack.sv         |  19 ++--
 pipelined/src/fpu/unpackinput.sv    | 152 +++++++++++-----------------
 pipelined/testbench/testbench-fp.sv |  41 ++++----
 6 files changed, 105 insertions(+), 134 deletions(-)

diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index 307c77b26..be67c99bd 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 5d16ccc51..f160d3ea6 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -472,7 +472,7 @@ module fma2(
     // Select the result
     ///////////////////////////////////////////////////////////////////////////////
 
-    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
+    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
         .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
         .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
         .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
@@ -1002,6 +1002,7 @@ module resultselect(
     input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
     input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
     input logic                     ZDenormM, // is the original precision denormalized
+    input logic 		            ZZeroM,
     input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
     input logic                     PSgnM,      // the product's sign
     input logic                     ResultSgn,  // the result's sign
@@ -1027,7 +1028,7 @@ module resultselect(
         end
         assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                     {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
         assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
         assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
         assign NormResult = {ResultSgn, ResultExp, ResultFrac};
@@ -1046,7 +1047,7 @@ module resultselect(
                                                                                                                             {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                         ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                             {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
         assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
         assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
         assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@@ -1066,7 +1067,7 @@ module resultselect(
                     
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                         {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                     InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                     NormResult = {ResultSgn, ResultExp, ResultFrac};
@@ -1082,7 +1083,7 @@ module resultselect(
                     end
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                                   {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                     UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                     InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
                     NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@@ -1099,7 +1100,7 @@ module resultselect(
                     
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
                                                                                                                                   {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                     UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                     InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
                     NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
@@ -1137,7 +1138,7 @@ module resultselect(
                     
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                         {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                     UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                     InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                     NormResult = {ResultSgn, ResultExp, ResultFrac};
@@ -1153,7 +1154,7 @@ module resultselect(
                     end
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
                                                                                                                                   {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                     UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                     InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
                     NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
@@ -1170,7 +1171,7 @@ module resultselect(
                     
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
                                                                                                                                   {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                     UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                     InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
                     NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
@@ -1188,7 +1189,7 @@ module resultselect(
                     OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
                                                                                                               {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
 
-                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                     UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                     InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
                     NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 757810921..113403c95 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -95,7 +95,7 @@ module fpu (
    logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
    logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
    logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, YDenormE, ZDenormE;       // is the input denormalized
+   logic 		  XDenormE, ZDenormE;       // is the input denormalized
    logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
    logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
    logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
@@ -176,7 +176,7 @@ module fpu (
    //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
    unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
          .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
+         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
          .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
 
    // FMA
diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv
index e9f005e7a..e28b2efe1 100644
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@@ -8,26 +8,29 @@ module unpack (
     output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
     output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
     output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-    output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+    output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
     output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
     output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
     output logic                    XExpMaxE                        // does X have the maximum exponent (NaN or Inf)
 );
  
     logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
-    logic           XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero
+    logic           XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
     logic           XFracZero, YFracZero, ZFracZero; // is the fraction zero
     logic           YExpMaxE, ZExpMaxE;  // is the exponent all 1s
     
     unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), 
-                            .NaN(XNaNE), .SNaN(XSNaNE), .Denorm(XDenormE), 
-                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE));
+                            .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
+                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
 
     unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), 
-                            .NaN(YNaNE), .SNaN(YSNaNE), .Denorm(YDenormE), 
-                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE));
+                            .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
+                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
 
     unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), 
-                            .NaN(ZNaNE), .SNaN(ZSNaNE), .Denorm(ZDenormE), 
-                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE));
+                            .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
+                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
+    // is the input denormalized
+    assign XDenormE = ~XExpNonZero & ~XFracZero;
+    assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv
index 9c58a444d..434a5bf00 100644
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@@ -8,25 +8,24 @@ module unpackinput (
     output logic [`NF:0]            Man,    // mantissas of XYZ (converted to largest supported precision)
     output logic                    NaN,    // is XYZ a NaN
     output logic                    SNaN, // is XYZ a signaling NaN
-    output logic                    Denorm,   // is XYZ denormalized
     output logic                    Zero,         // is XYZ zero
     output logic                    Inf,            // is XYZ infinity
+    output logic                    ExpNonZero,            // is the exponent not zero
+    output logic                    FracZero,            // is the fraction zero
     output logic                    ExpMax                       // does In have the maximum exponent (NaN or Inf)
 );
  
     logic [`NF-1:0] Frac; //Fraction of XYZ
-    logic           ExpNonZero; // is the exponent of XYZ non-zero
-    logic           FracZero; // is the fraction zero
     logic           ExpZero;
+    logic           BadNaNBox;
     
     if (`FPSIZES == 1) begin        // if there is only one floating point format supported
+        assign BadNaNBox = 0;
         assign Sgn = In[`FLEN-1];  // sign bit
         assign Frac = In[`NF-1:0];  // fraction (no assumed 1)
-        assign FracZero = ~|Frac; // is the fraction zero?
-        assign ExpNonZero = |Exp;  // is the exponent non-zero
-        assign Denorm = ~ExpNonZero & ~FracZero; // is the input (in its original format) denormalized
-        assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm};  // exponent.  Denormalized numbers have effective biased exponent of 1
-        assign ExpMax = &Exp;  // is the exponent all 1's
+        assign ExpNonZero = |In[`FLEN-2:`NF];  // is the exponent non-zero
+        assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};  // exponent.  Denormalized numbers have effective biased exponent of 1
+        assign ExpMax = &In[`FLEN-2:`NF];  // is the exponent all 1's
     end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
         //***need better names for these constants
         // largest format | smaller format
@@ -47,25 +46,16 @@ module unpackinput (
         //      quad   and half
         //      double and half
 
-        logic  [`LEN1-1:0]  Len1; // Remove NaN boxing or NaN, if not properly NaN boxed
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
-        assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
+        assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
 
         // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-        assign Sgn = FmtE ? In[`FLEN-1] : Len1[`LEN1-1];
+        assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
 
         // extract the fraction, add trailing zeroes to the mantissa if nessisary
-        assign Frac = FmtE ? In[`NF-1:0] : {Len1[`NF1-1:0], (`NF-`NF1)'(0)};
+        assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
 
-        // is the fraction zero
-        assign FracZero = ~|Frac;
-        
         // is the exponent non-zero
-        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |Len1[`LEN1-2:`NF1]; 
-
-        // is the input (in it's original format) denormalized
-        assign Denorm = ~ExpNonZero & ~FracZero;
+        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
 
         // example double to single conversion:
         // 1023 = 0011 1111 1111
@@ -77,12 +67,10 @@ module unpackinput (
 
         // extract the exponent, converting the smaller exponent into the larger precision if nessisary
         //      - if the original precision had a denormal number convert the exponent value 1
-        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|Denorm} : {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; 
+        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
  
-
-
         // is the exponent all 1's
-        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &Len1[`LEN1-2:`NF1];
+        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
     
 
     end else if (`FPSIZES == 3) begin       // three floating point precsions supported
@@ -104,22 +92,21 @@ module unpackinput (
         //      quad   and double and half
         //      quad   and single and half
 
-        logic  [`LEN1-1:0]  Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
-        logic  [`LEN2-1:0]  Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
-        
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
-        assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision
-        assign Len2 = &In[`FLEN-1:`LEN2] ? In[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
-
+        // Check NaN boxing
+        always_comb
+            case (FmtE)
+                `FMT:  BadNaNBox = 0;
+                `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
+                `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
+                default: BadNaNBox = 0;
+            endcase
 
         // extract the sign bit
         always_comb
             case (FmtE)
                 `FMT:  Sgn = In[`FLEN-1];
-                `FMT1: Sgn = Len1[`LEN1-1];
-                `FMT2: Sgn = Len2[`LEN2-1];
+                `FMT1: Sgn = In[`LEN1-1];
+                `FMT2: Sgn = In[`LEN2-1];
                 default: Sgn = 0;
             endcase
 
@@ -127,27 +114,20 @@ module unpackinput (
         always_comb
             case (FmtE)
                 `FMT: Frac = In[`NF-1:0];
-                `FMT1: Frac = {Len1[`NF1-1:0], (`NF-`NF1)'(0)};
-                `FMT2: Frac = {Len2[`NF2-1:0], (`NF-`NF2)'(0)};
+                `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
+                `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
                 default: Frac = 0;
             endcase
 
-        // is the fraction zero
-        assign FracZero = ~|Frac;
-
-
         // is the exponent non-zero
         always_comb
             case (FmtE)
                 `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
-                `FMT1: ExpNonZero = |Len1[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
-                `FMT2: ExpNonZero = |Len2[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
+                `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
+                `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
                 default: ExpNonZero = 0; 
             endcase
             
-        // is the input (in it's original format) denormalized
-        assign Denorm = ~ExpNonZero & ~FracZero;
-
         // example double to single conversion:
         // 1023 = 0011 1111 1111
         // 127  = 0000 0111 1111 (subtract this)
@@ -159,9 +139,9 @@ module unpackinput (
         // convert the larger precision's exponent to use the largest precision's bias
         always_comb 
             case (FmtE)
-                `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm};
-                `FMT1: Exp = {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; 
-                `FMT2: Exp = {Len2[`LEN2-2], {`NE-`NE2{~Len2[`LEN2-2]}}, Len2[`LEN2-3:`NF2+1], Len2[`NF2]|Denorm}; 
+                `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
+                `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+                `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
                 default: Exp = 0;
             endcase
 
@@ -169,8 +149,8 @@ module unpackinput (
         always_comb
             case (FmtE)
                 `FMT:  ExpMax = &In[`FLEN-2:`NF];
-                `FMT1: ExpMax = &Len1[`LEN1-2:`NF1];
-                `FMT2: ExpMax = &Len2[`LEN2-2:`NF2];
+                `FMT1: ExpMax = &In[`LEN1-2:`NF1];
+                `FMT2: ExpMax = &In[`LEN2-2:`NF2];
                 default: ExpMax = 0;
             endcase
 
@@ -184,27 +164,22 @@ module unpackinput (
         //   `Q_BIAS |  `D_BIAS |  `S_BIAS |  `H_BIAS    exponent's bias value
         //   `Q_FMT  |  `D_FMT  |  `S_FMT  |  `H_FMT     precision's format value - Q=11 D=01 S=00 H=10
 
-
-        logic  [`D_LEN-1:0]  Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
-        logic  [`S_LEN-1:0]  Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
-        logic  [`H_LEN-1:0]  Len3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
-        
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
-        assign Len1 = &In[`Q_LEN-1:`D_LEN] ? In[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision
-        assign Len2 = &In[`Q_LEN-1:`S_LEN] ? In[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision
-        assign Len3 = &In[`Q_LEN-1:`H_LEN] ? In[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
+        // Check NaN boxing
+        always_comb
+            case (FmtE)
+                2'b11:  BadNaNBox = 0;
+                2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
+                2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
+                2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
+            endcase
 
         // extract sign bit
         always_comb
             case (FmtE)
                 2'b11: Sgn = In[`Q_LEN-1];
-                2'b01: Sgn = Len1[`D_LEN-1];
-                2'b00: Sgn = Len2[`S_LEN-1];
-                2'b10: Sgn = Len3[`H_LEN-1];
+                2'b01: Sgn = In[`D_LEN-1];
+                2'b00: Sgn = In[`S_LEN-1];
+                2'b10: Sgn = In[`H_LEN-1];
             endcase
             
 
@@ -212,26 +187,20 @@ module unpackinput (
         always_comb
             case (FmtE)
                 2'b11: Frac = In[`Q_NF-1:0];
-                2'b01: Frac = {Len1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
-                2'b00: Frac = {Len2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
-                2'b10: Frac = {Len3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
+                2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
+                2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
+                2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
             endcase
 
-        // is the fraction zero
-        assign FracZero = ~|Frac;
-
         // is the exponent non-zero
         always_comb
             case (FmtE)
                 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
-                2'b01: ExpNonZero = |Len1[`D_LEN-2:`D_NF];
-                2'b00: ExpNonZero = |Len2[`S_LEN-2:`S_NF]; 
-                2'b10: ExpNonZero = |Len3[`H_LEN-2:`H_NF]; 
+                2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
+                2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 
+                2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; 
             endcase
 
-        // is the input (in it's original format) denormalized
-        assign Denorm = ~ExpNonZero & ~FracZero;
-
 
         // example double to single conversion:
         // 1023 = 0011 1111 1111
@@ -244,10 +213,10 @@ module unpackinput (
         // convert the double precsion exponent into quad precsion
         always_comb
             case (FmtE)
-                2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|Denorm};
-                2'b01: Exp = {Len1[`D_LEN-2], {`Q_NE-`D_NE{~Len1[`D_LEN-2]}}, Len1[`D_LEN-3:`D_NF+1], Len1[`D_NF]|Denorm};
-                2'b00: Exp = {Len2[`S_LEN-2], {`Q_NE-`S_NE{~Len2[`S_LEN-2]}}, Len2[`S_LEN-3:`S_NF+1], Len2[`S_NF]|Denorm};
-                2'b10: Exp = {Len3[`H_LEN-2], {`Q_NE-`H_NE{~Len3[`H_LEN-2]}}, Len3[`H_LEN-3:`H_NF+1], Len3[`H_NF]|Denorm}; 
+                2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
+                2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
+                2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
+                2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; 
             endcase
 
 
@@ -255,19 +224,18 @@ module unpackinput (
         always_comb 
             case (FmtE)
                 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
-                2'b01: ExpMax = &Len1[`D_LEN-2:`D_NF];
-                2'b00: ExpMax = &Len2[`S_LEN-2:`S_NF];
-                2'b10: ExpMax = &Len3[`H_LEN-2:`H_NF];
+                2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
+                2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
+                2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
             endcase
 
     end
 
     // Output logic
-    assign ExpZero = ~ExpNonZero; // is the exponent all 0's?
+    assign FracZero = ~|Frac; // is the fraction zero?
     assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
-    //   ***  - force to be a NaN if it isn't properly Nan Boxed
-    assign NaN = ExpMax & ~FracZero; // is the input a NaN?
-    assign SNaN = NaN&~Frac[`NF-1]; // is the input a singnaling NaN?
+    assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN?
+    assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
     assign Inf = ExpMax & FracZero; // is the input infinity?
-    assign Zero = ExpZero & FracZero; // is the input zero?
+    assign Zero = ~ExpNonZero & FracZero; // is the input zero?
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 892e76373..b46061a03 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -79,7 +79,6 @@ module testbenchfp;
   logic [`NF:0]         FmaRuXMan, FmaRuYMan, FmaRuZMan;
   logic [`NF:0]         FmaRdXMan, FmaRdYMan, FmaRdZMan;
   logic [`NF:0]         FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
-  logic                 XNorm;                                // is X normal
   logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
   logic                 FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
   logic                 FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
@@ -92,12 +91,12 @@ module testbenchfp;
   logic                 FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
   logic                 FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
   logic                 FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
-  logic                 XDenorm, YDenorm, ZDenorm;            // is the input denormalized
-  logic                 FmaRneXDenorm, FmaRneYDenorm, FmaRneZDenorm;
-  logic                 FmaRzXDenorm, FmaRzYDenorm, FmaRzZDenorm;
-  logic                 FmaRuXDenorm, FmaRuYDenorm, FmaRuZDenorm;
-  logic                 FmaRdXDenorm, FmaRdYDenorm, FmaRdZDenorm;
-  logic                 FmaRnmXDenorm, FmaRnmYDenorm, FmaRnmZDenorm;
+  logic                 XDenorm, ZDenorm;            // is the input denormalized
+  logic                 FmaRneXDenorm, FmaRneZDenorm;
+  logic                 FmaRzXDenorm, FmaRzZDenorm;
+  logic                 FmaRuXDenorm, FmaRuZDenorm;
+  logic                 FmaRdXDenorm, FmaRdZDenorm;
+  logic                 FmaRnmXDenorm, FmaRnmZDenorm;
   logic                 XInf, YInf, ZInf;                   // is the input infinity
   logic                 FmaRneXInf, FmaRneYInf, FmaRneZInf;
   logic                 FmaRzXInf, FmaRzYInf, FmaRzZInf;
@@ -683,7 +682,7 @@ module testbenchfp;
                                     .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), 
                                     .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
                                     .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), 
-                                    .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), 
+                                    .XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm), 
                                     .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
                                     .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
                                     .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
@@ -693,7 +692,7 @@ module testbenchfp;
                                     .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), 
                                     .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
                                     .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), 
-                                    .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), 
+                                    .XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm), 
                                     .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
                                     .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
                                     .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
@@ -703,7 +702,7 @@ module testbenchfp;
                                     .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), 
                                     .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
                                     .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), 
-                                    .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), 
+                                    .XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm), 
                                     .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
                                     .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
                                     .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
@@ -713,7 +712,7 @@ module testbenchfp;
                                     .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
                                     .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
                                     .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), 
-                                    .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), 
+                                    .XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm), 
                                     .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
                                     .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
                                     .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
@@ -723,7 +722,7 @@ module testbenchfp;
                                     .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
                                     .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
                                     .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), 
-                                    .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), 
+                                    .XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm), 
                                     .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
                                     .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
                                     .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
@@ -733,9 +732,9 @@ module testbenchfp;
                                     .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
                                     .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                     .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
-                                    .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), 
+                                    .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
                                     .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf),.XNormE(XNorm), .XExpMaxE(XExpMax),
+                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax),
                                     .X, .Y, .Z);
 
 
@@ -1294,13 +1293,13 @@ module readfmavectors (
   output logic [`NF:0]        XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
   output logic                XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
   output logic                XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+  output logic                XDenormE, ZDenormE,   // is XYZ denormalized
   output logic                XZeroE, YZeroE, ZZeroE,         // is XYZ zero
   output logic                XInfE, YInfE, ZInfE,            // is XYZ infinity
   output logic [`FLEN-1:0]    X, Y, Z                 // inputs
 );
 
-  logic XNormE, XExpMaxE; // signals the unpacker outputs but isn't used in FMA
+  logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
   // apply test vectors on rising edge of clk
   // Format of vectors Inputs(1/2/3)_AnsFlg
   always @(posedge clk) begin
@@ -1335,7 +1334,7 @@ module readfmavectors (
   end
   
   unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
-                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
                 .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                 .XExpMaxE, .ZDenormE);
 endmodule
@@ -1373,10 +1372,10 @@ module readvectors (
   output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
   output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
   output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+  output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
   output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
   output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic XNormE, XExpMaxE,
+  output logic XExpMaxE,
   output logic [`FLEN-1:0] X, Y, Z
 );
 
@@ -1660,7 +1659,7 @@ module readvectors (
   end
   
   unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
-                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
+                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                 .XExpMaxE);
 endmodule
\ No newline at end of file

From ee7070c5b6cda39d0ffbe07b7da4ee6aebbb1aef Mon Sep 17 00:00:00 2001
From: DTowersM <dtowersm@gmail.com>
Date: Wed, 1 Jun 2022 17:19:19 +0000
Subject: [PATCH 3/6] some changes to further support vsim on embench

---
 benchmarks/embench/Makefile | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile
index 0cb54e1a1..4a4875dfa 100644
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@@ -6,14 +6,23 @@ all: build sim
 
 allClean: clean all
 
-build:
-	../../addins/embench-iot/build_all.py -v --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" 
+build: buildspeed buildsize
+
+buildspeed:
+	../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" 
 	find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
-	../../addins/embench-iot/build_all.py -v --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"
 
-sim: modelSimBuild size speed
+buildsize:
+	../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"
 
-modelSimBuild: objdump
+sim: modelSimBuild speed
+
+# vsim:
+# 	cd ../../pipelined/regression/
+# 	vsim -c -do "do wally-pipelined-batch.do rv32gc embench"
+# 	cd ../../benchmarks/embench/
+
+modelSimBuild: buildspeed objdump
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
 
@@ -23,13 +32,16 @@ size:
 speed:
 	../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
 
-objdump:
+objdump: buildspeed
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
 
 clean: 
 	rm -rf ../../addins/embench-iot/bd_speed/
 	rm -rf ../../addins/embench-iot/bd_size/
 
+allclean: clean
+	rm -rf ../../addins/embench-iot/logs/
+
 # std:
 # 	../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm" 
 # 	riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump

From d28b4cf6027a8c39b5c5ef05086fca7834af41b8 Mon Sep 17 00:00:00 2001
From: DTowersM <dtowersm@gmail.com>
Date: Wed, 1 Jun 2022 21:00:44 +0000
Subject: [PATCH 4/6] added support for embench post processing to testbench.sv

---
 pipelined/testbench/testbench.sv | 138 ++++++++++++++++++-------------
 1 file changed, 79 insertions(+), 59 deletions(-)

diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv
index ba3122926..0ebab1cf9 100644
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@@ -128,7 +128,8 @@ logic [3:0] dummy;
     end
   end
 
-  string signame, memfilename, pathname, objdumpfilename, adrstr;
+  string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile;
+  integer outputFilePointer;
 
   logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
   logic UARTSin, UARTSout;
@@ -213,70 +214,88 @@ logic [3:0] dummy;
           $display("Benchmark: coremark is done.");
           $stop;
         end
+      // Termination condition (i.e. we finished running current test) 
       if (DCacheFlushDone) begin
- 
-        #600; // give time for instructions in pipeline to finish
-        // clear signature to prevent contamination from previous tests
-        for(i=0; i<SIGNATURESIZE; i=i+1) begin
-          sig32[i] = 'bx;
-        end
-
-        // read signature, reformat in 64 bits if necessary
-        signame = {pathname, tests[test], ".signature.output"};
-        $readmemh(signame, sig32);
-        i = 0;
-        while (i < SIGNATURESIZE) begin
-          if (`XLEN == 32) begin
-            signature[i] = sig32[i];
-            i = i+1;
-          end else begin
-            signature[i/2] = {sig32[i+1], sig32[i]};
-            i = i + 2;
-          end
-          if (i >= 4 & sig32[i-4] === 'bx) begin
-            if (i == 4) begin
-              i = SIGNATURESIZE+1; // flag empty file
-              $display("  Error: empty test file");
-            end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
-          end
-        end
-
-        // Check errors
-        errors = (i == SIGNATURESIZE+1); // error if file is empty
-        i = 0;
+        // Gets the memory location of begin_signature
         testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
         testadrNoBase = (tests[test+1].atohex())/(`XLEN/8);
-        /* verilator lint_off INFINITELOOP */
-        while (signature[i] !== 'bx) begin
-          logic [`XLEN-1:0] sig;
-          if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i];
-          else                   sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i];
-          //$display("signature[%h] = %h sig = %h", i, signature[i], sig);
-          if (signature[i] !== sig &
-          //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
-	      (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
-            if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
-              // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
-              // report errors unless they are garbage at the end of the sim
-              // kind of hacky test for garbage right now
-              $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
-              errors = errors+1;
-              $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
-                    tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
-                    //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]);
-              $stop;//***debug
+        #600; // give time for instructions in pipeline to finish
+        if (TEST == "embench") begin
+          // Writes contents of begin_signature to .sim.output file
+          // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score
+          // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking
+          $display("Embench Benchmark: %s is done.", tests[test]);
+          outputfile = {pathname, tests[test], ".sim.output"};
+          outputFilePointer = $fopen(outputfile);
+          i = 0;
+          while ($unsigned(i) < $unsigned(5'd5)) begin
+            $fdisplayh(outputFilePointer, DCacheFlushFSM.ShadowRAM[testadr+i]);
+            i = i + 1;
+          end
+          $fclose(outputFilePointer);
+          $display("Embench Benchmark: created output file: %s", outputfile);
+        end else begin 
+          // for tests with no self checking mechanism, read .signature.output file and compare to check for errors
+          // clear signature to prevent contamination from previous tests
+          for(i=0; i<SIGNATURESIZE; i=i+1) begin
+            sig32[i] = 'bx;
+          end
+          // read signature, reformat in 64 bits if necessary
+          signame = {pathname, tests[test], ".signature.output"};
+          $readmemh(signame, sig32);
+          i = 0;
+          while (i < SIGNATURESIZE) begin
+            if (`XLEN == 32) begin
+              signature[i] = sig32[i];
+              i = i+1;
+            end else begin
+              signature[i/2] = {sig32[i+1], sig32[i]};
+              i = i + 2;
+            end
+            if (i >= 4 & sig32[i-4] === 'bx) begin
+              if (i == 4) begin
+                i = SIGNATURESIZE+1; // flag empty file
+                $display("  Error: empty test file");
+              end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
             end
           end
-          i = i + 1;
-        end
-        /* verilator lint_on INFINITELOOP */
-        if (errors == 0) begin
-          $display("%s succeeded.  Brilliant!!!", tests[test]);
-        end
-        else begin
-          $display("%s failed with %d errors. :(", tests[test], errors);
-          totalerrors = totalerrors+1;
+
+          // Check errors
+          errors = (i == SIGNATURESIZE+1); // error if file is empty
+          i = 0;
+          /* verilator lint_off INFINITELOOP */
+          while (signature[i] !== 'bx) begin
+            logic [`XLEN-1:0] sig;
+            if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i];
+            else                   sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i];
+            //$display("signature[%h] = %h sig = %h", i, signature[i], sig);
+            if (signature[i] !== sig &
+            //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
+            (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
+              if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
+                // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
+                // report errors unless they are garbage at the end of the sim
+                // kind of hacky test for garbage right now
+                $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
+                errors = errors+1;
+                $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
+                      tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
+                      //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]);
+                $stop;//***debug
+              end
+            end
+            i = i + 1;
+          end
+          /* verilator lint_on INFINITELOOP */
+          if (errors == 0) begin
+            $display("%s succeeded.  Brilliant!!!", tests[test]);
+          end
+          else begin
+            $display("%s failed with %d errors. :(", tests[test], errors);
+            totalerrors = totalerrors+1;
+          end
         end
+        // move onto the next test, check to see if we're done
         test = test + 2;
         if (test == tests.size()) begin
           if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
@@ -284,6 +303,7 @@ logic [3:0] dummy;
           $stop;
         end
         else begin
+            // If there are still additional tests to run, read in information for the next test
             //pathname = tvpaths[tests[0]];
             memfilename = {pathname, tests[test], ".elf.memfile"};
             //$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);

From bed4fad7b26d903a57e29f3f6eea1a712d52da6c Mon Sep 17 00:00:00 2001
From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com>
Date: Wed, 1 Jun 2022 21:02:49 +0000
Subject: [PATCH 5/6] fixed errors in synth.out by switching ( to {

---
 synthDC/scripts/synth.tcl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl
index 9d8395a4a..fb796b796 100755
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@@ -71,7 +71,9 @@ if { $saifpower == 1 } {
 }
 
 # Set reset false path
-set_false_path -from [get_ports reset]
+if {$drive != "INV"} {
+    set_false_path -from [get_ports reset]
+}
 
 # Set Frequency in [MHz] or period in [ns]
 set my_clock_pin clk
@@ -112,13 +114,13 @@ set all_in_ex_clk [remove_from_collection [all_inputs] [get_ports $my_clk]]
 if {$tech == "sky130"} {
     set_driving_cell  -lib_cell sky130_osu_sc_12T_ms__dff_1 -pin Q $all_in_ex_clk
 } elseif {$tech == "sky90"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_driving_cell -lib_cell scc9gena_inv_1 -pin Y $all_in_ex_clk
     } else {
 	set_driving_cell  -lib_cell scc9gena_dfxbp_1 -pin Q $all_in_ex_clk
     }
 } elseif {$tech == "tsmc28"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_driving_cell -lib_cell INVD1BWP30P140 -pin ZN $all_in_ex_clk
     }
 }
@@ -131,13 +133,13 @@ set_output_delay 0.0 -max -clock $my_clk [all_outputs]
 if {$tech == "sky130"} {
     set_load [expr [load_of sky130_osu_sc_12T_ms_TT_1P8_25C.ccs/sky130_osu_sc_12T_ms__dff_1/D] * 1] [all_outputs]
 } elseif {$tech == "sky90"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_inv_4/A] * 1] [all_outputs]
     } else {
         set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_dfxbp_1/D] * 1] [all_outputs]
     }
 } elseif {$tech == "tsmc28"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_load [expr [load_of tcbn28hpcplusbwp30p140tt0p9v25c/INVD4BWP30P140/I] * 1] [all_outputs]
     }
 }

From e42afbfb307859143a43a228befeda00faed4709 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 1 Jun 2022 23:34:29 +0000
Subject: [PATCH 6/6] paramerterized some small fma units

---
 pipelined/config/rv64fp/wally-config.vh |  4 ++--
 pipelined/src/fpu/fclassify.sv          |  4 ++--
 pipelined/src/fpu/fctrl.sv              | 20 ++++++++++++++--
 pipelined/src/fpu/fpu.sv                |  4 ++--
 pipelined/src/fpu/fregfile.sv           |  6 ++---
 pipelined/src/fpu/fsgninj.sv            | 32 +++++++++++++++++++++----
 pipelined/src/generic/lzc.sv            |  2 +-
 synthDC/scripts/synth.tcl               | 20 ++++++----------
 8 files changed, 63 insertions(+), 29 deletions(-)

diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index 36cda4d91..e88b012aa 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -39,12 +39,12 @@
 
 // MISA RISC-V configuration per specification
 //16 - quad 3 - double 5 - single
-`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
+`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 0 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
 `define ZICOUNTERS_SUPPORTED 1
-`define ZFH_SUPPORTED 1
+`define ZFH_SUPPORTED 0
 
 /// Microarchitectural Features
 `define UARCH_PIPELINED 1
diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv
index 05a91d212..a1a934ffe 100644
--- a/pipelined/src/fpu/fclassify.sv
+++ b/pipelined/src/fpu/fclassify.sv
@@ -8,7 +8,7 @@ module fclassify (
     input logic         XDenormE, // is denormal
     input logic         XZeroE, // is zero
     input logic         XInfE,  // is infinity
-    output logic [63:0] ClassResE // classify result
+    output logic [`XLEN-1:0] ClassResE // classify result
     );
 
     logic PInf, PZero, PNorm, PDenorm;
@@ -37,6 +37,6 @@ module fclassify (
     //  bit 7 - +Inf
     //  bit 8 - signaling NaN
     //  bit 9 - quiet NaN
-    assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm,  PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
+    assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm,  PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
 
 endmodule
diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index 0640a544e..690f9f940 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -1,3 +1,4 @@
+`include "wally-config.vh"
 
 module fctrl (
   input  logic [6:0] Funct7D,   // bits 31:25 of instruction - may contain percision
@@ -13,7 +14,7 @@ module fctrl (
   output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
   output logic [1:0] FResSelD,    // select one of the results done in the memory stage
   output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
-  output logic       FmtD,        // precision - single-0 double-1
+  output logic [`FPSIZES/3:0] FmtD,        // precision - single-0 double-1
   output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
   output logic       FWriteIntD   // is the result written to the integer register
   );
@@ -119,8 +120,23 @@ module fctrl (
   // Precision
   //    0-single
   //    1-double
-  assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
+  
+    if (`FPSIZES == 1)begin
+      logic [1:0] FmtTmp;
+      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = `FMT == FmtTmp;
+end
+      //assign FmtD = 0; *** change back after full paramerterization
 
+    else if (`FPSIZES == 2)begin
+      logic [1:0] FmtTmp;
+      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = `FMT == FmtTmp;
+    end
+    else if (`FPSIZES == 3|`FPSIZES == 4)
+      assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+
+      // assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
   // FResultSel:
   //    000 - ReadRes - load
   //    001 - FMARes  - FMA and multiply
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 113403c95..0fa125b80 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -115,7 +115,7 @@ module fpu (
    logic [63:0] 	  CvtResE;                   // FP <-> int convert result
    logic [`XLEN-1:0] CvtIntResE;                   // FP <-> int convert result
    logic [4:0] 	  CvtFlgE;                   // FP <-> int convert flags //*** trim this	
-   logic [63:0] 	  ClassResE;               // classify result
+   logic [`XLEN-1:0] 	  ClassResE;               // classify result
    logic [63:0] 	  CmpResE;                   // compare result
    logic 		  CmpNVE;                     // compare invalid flag (Not Valid)     
    logic [63:0] 	  SgnResE;                   // sign injection result
@@ -231,7 +231,7 @@ module fpu (
    mux4  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
 
    // select the result that may be written to the integer register - to IEU
-   mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], 
+   mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE, 
                CvtIntResE, FIntResSelE, FIntResE);
    // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
    // *** make sure the fpu matches the chapter diagram
diff --git a/pipelined/src/fpu/fregfile.sv b/pipelined/src/fpu/fregfile.sv
index 2d54038de..00c89ff56 100644
--- a/pipelined/src/fpu/fregfile.sv
+++ b/pipelined/src/fpu/fregfile.sv
@@ -33,10 +33,10 @@ module fregfile (
   input logic 	      clk, reset,
   input logic 	      we4, 
   input logic [4:0]   a1, a2, a3, a4, 
-  input logic [63:0]  wd4,
-  output logic [63:0] rd1, rd2, rd3);
+  input logic [`FLEN-1:0]  wd4,
+  output logic [`FLEN-1:0] rd1, rd2, rd3);
    
-   logic [63:0]       rf[31:0];
+   logic [`FLEN-1:0]       rf[31:0];
    integer 	      i;
    
    // three ported register file
diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv
index 8474fdff6..00c1372c2 100755
--- a/pipelined/src/fpu/fsgninj.sv
+++ b/pipelined/src/fpu/fsgninj.sv
@@ -26,13 +26,14 @@
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
+`include "wally-config.vh"
 
 module fsgninj (  
 	input logic        	XSgnE, YSgnE,	// X and Y sign bits
-	input logic [63:0] 	FSrcXE,			// X
-	input logic 		FmtE,			// precision 1 = double 0 = single
+	input logic [`FLEN-1:0] 	FSrcXE,			// X
+	input logic [`FPSIZES/3:0]		FmtE,			// precision 1 = double 0 = single
 	input  logic [1:0]  SgnOpCodeE,		// operation control
-	output logic [63:0] SgnResE			// result
+	output logic [`FLEN-1:0] SgnResE			// result
 );
 
 	logic ResSgn;
@@ -50,7 +51,30 @@ module fsgninj (
 	// format final result based on precision
 	//    - uses NaN-blocking format
 	//        - if there are any unsused bits the most significant bits are filled with 1s
-	assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]};
+	
+    if (`FPSIZES == 1)
+		assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
+
+    else if (`FPSIZES == 2)
+		assign SgnResE = FmtE ? {ResSgn, FSrcXE[`FLEN-2:0]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
+
+    else if (`FPSIZES == 3)
+        always_comb
+            case (FmtE)
+                `FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
+                `FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
+                `FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]};
+                default: SgnResE = 0;
+            endcase
+
+    else if (`FPSIZES == 4)
+        always_comb
+            case (FmtE)
+                2'h3: SgnResE = {ResSgn, FSrcXE[`Q_LEN-2:0]};
+                2'h1: SgnResE = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, FSrcXE[`D_LEN-2:0]};
+                2'h0: SgnResE = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, FSrcXE[`S_LEN-2:0]};
+                2'h2: SgnResE = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, FSrcXE[`H_LEN-2:0]};
+            endcase
 
 
 endmodule
diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv
index 1ce082475..123edcb6e 100644
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@@ -1,5 +1,5 @@
 //leading zero counter i.e. priority encoder
-module lzc #(parameter WIDTH=1) (
+module lzc #(parameter WIDTH = 1) (
     input logic  [WIDTH-1:0]            num,
     output logic [$clog2(WIDTH+1)-1:0]  ZeroCnt
 );
diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl
index fb796b796..3d7d9825c 100755
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@@ -325,21 +325,15 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
 
 set filename [format "%s%s%s%s" $outputDir  "/reports/" $my_toplevel "_fpu_timing.rep"]
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fma/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/*} -nworst 1 }
+redirect -append $filename { echo "\n\n\n//// Critical paths through fma1 ////\n\n\n" }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma1/*} -nworst 1 }
+redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma2/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fdivsqrt/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through faddcvt ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.faddcvt/*} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through FMAResM ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.FMAResM} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through FDivResM ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.FDivResM} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through FResE ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.FResE} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through fma/SumE ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fma/SumE} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through fma/ProdExpE ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fma/ProdExpE} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {faddcvt/*} -nworst 1 }
 
 set filename [format "%s%s%s%s" $outputDir  "/reports/" $my_toplevel "_mmu_timing.rep"]
 redirect -append $filename { echo "\n\n\n//// Critical paths through immu/physicaladdress ////\n\n\n" }