Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2025-02-11 06:05:49 +00:00 · 2022-06-02 02:52:03 +00:00 · 2022-06-02 02:52:03 +00:00 · c16c5beef5
commit c16c5beef5
parent 65961223f8 e42afbfb30
15 changed files with 277 additions and 238 deletions
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@ -6,14 +6,23 @@ all: build sim

 allClean: clean all

-build:
-	../../addins/embench-iot/build_all.py -v --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" 
+build: buildspeed buildsize
+
+buildspeed:
+	../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" 
 	find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
-	../../addins/embench-iot/build_all.py -v --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"

-sim: modelSimBuild size speed
+buildsize:
+	../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"

-modelSimBuild: objdump
+sim: modelSimBuild speed
+
+# vsim:
+# 	cd ../../pipelined/regression/
+# 	vsim -c -do "do wally-pipelined-batch.do rv32gc embench"
+# 	cd ../../benchmarks/embench/
+
+modelSimBuild: buildspeed objdump
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done

@ -23,13 +32,16 @@ size:
 speed:
 	../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1

-objdump:
+objdump: buildspeed
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done

 clean: 
 	rm -rf ../../addins/embench-iot/bd_speed/
 	rm -rf ../../addins/embench-iot/bd_size/

+allclean: clean
+	rm -rf ../../addins/embench-iot/logs/
+
 # std:
 # 	../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm" 
 # 	riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@ -39,12 +39,12 @@

 // MISA RISC-V configuration per specification
 //16 - quad 3 - double 5 - single
-`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
+`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 0 << 16 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
 `define ZICOUNTERS_SUPPORTED 1
-`define ZFH_SUPPORTED 1
+`define ZFH_SUPPORTED 0

 /// Microarchitectural Features
 `define UARCH_PIPELINED 1
--- a/pipelined/src/fpu/fclassify.sv
+++ b/pipelined/src/fpu/fclassify.sv
@ -8,7 +8,7 @@ module fclassify (
    input logic         XDenormE, // is denormal
    input logic         XZeroE, // is zero
    input logic         XInfE,  // is infinity
-    output logic [63:0] ClassResE // classify result
+    output logic [`XLEN-1:0] ClassResE // classify result
    );

    logic PInf, PZero, PNorm, PDenorm;
@ -37,6 +37,6 @@ module fclassify (
    //  bit 7 - +Inf
    //  bit 8 - signaling NaN
    //  bit 9 - quiet NaN
-    assign ClassResE = {{54{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm,  PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
+    assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm,  PDenorm, PZero, NZero, NDenorm, NNorm, NInf};

 endmodule
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@ -1,3 +1,4 @@
+`include "wally-config.vh"

 module fctrl (
  input  logic [6:0] Funct7D,   // bits 31:25 of instruction - may contain percision
@ -13,7 +14,7 @@ module fctrl (
  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
  output logic [1:0] FResSelD,    // select one of the results done in the memory stage
  output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
-  output logic       FmtD,        // precision - single-0 double-1
+  output logic [`FPSIZES/3:0] FmtD,        // precision - single-0 double-1
  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
  output logic       FWriteIntD   // is the result written to the integer register
  );
@ -119,8 +120,23 @@ module fctrl (
  // Precision
  //    0-single
  //    1-double
-  assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
+  
+    if (`FPSIZES == 1)begin
+      logic [1:0] FmtTmp;
+      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = `FMT == FmtTmp;
+end
+      //assign FmtD = 0; *** change back after full paramerterization

+    else if (`FPSIZES == 2)begin
+      logic [1:0] FmtTmp;
+      assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = `FMT == FmtTmp;
+    end
+    else if (`FPSIZES == 3|`FPSIZES == 4)
+      assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+
+      // assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
  // FResultSel:
  //    000 - ReadRes - load
  //    001 - FMARes  - FMA and multiply
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -472,7 +472,7 @@ module fma2(
    // Select the result
    ///////////////////////////////////////////////////////////////////////////////

-    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
+    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
@ -1002,6 +1002,7 @@ module resultselect(
    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic                     ZDenormM, // is the original precision denormalized
+    input logic 		            ZZeroM,
    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
    input logic                     PSgnM,      // the product's sign
    input logic                     ResultSgn,  // the result's sign
@ -1027,7 +1028,7 @@ module resultselect(
        end
        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1046,7 +1047,7 @@ module resultselect(
                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1066,7 +1067,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1082,7 +1083,7 @@ module resultselect(
                    end
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
                    NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1099,7 +1100,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
                    NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
@ -1137,7 +1138,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1153,7 +1154,7 @@ module resultselect(
                    end
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
                    NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
@ -1170,7 +1171,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
                    NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
@ -1188,7 +1189,7 @@ module resultselect(
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      

-                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
                    NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -95,7 +95,7 @@ module fpu (
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
   logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
   logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, YDenormE, ZDenormE;       // is the input denormalized
+   logic 		  XDenormE, ZDenormE;       // is the input denormalized
   logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
   logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
   logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
@ -115,7 +115,7 @@ module fpu (
   logic [63:0] 	  CvtResE;                   // FP <-> int convert result
   logic [`XLEN-1:0] CvtIntResE;                   // FP <-> int convert result
   logic [4:0] 	  CvtFlgE;                   // FP <-> int convert flags //*** trim this	
-   logic [63:0] 	  ClassResE;               // classify result
+   logic [`XLEN-1:0] 	  ClassResE;               // classify result
   logic [63:0] 	  CmpResE;                   // compare result
   logic 		  CmpNVE;                     // compare invalid flag (Not Valid)     
   logic [63:0] 	  SgnResE;                   // sign injection result
@ -176,7 +176,7 @@ module fpu (
   //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
+         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);

   // FMA
@ -231,7 +231,7 @@ module fpu (
   mux4  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);

   // select the result that may be written to the integer register - to IEU
-   mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], 
+   mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE, 
               CvtIntResE, FIntResSelE, FIntResE);
   // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
   // *** make sure the fpu matches the chapter diagram
--- a/pipelined/src/fpu/fregfile.sv
+++ b/pipelined/src/fpu/fregfile.sv
@ -33,10 +33,10 @@ module fregfile (
  input logic 	      clk, reset,
  input logic 	      we4, 
  input logic [4:0]   a1, a2, a3, a4, 
-  input logic [63:0]  wd4,
-  output logic [63:0] rd1, rd2, rd3);
+  input logic [`FLEN-1:0]  wd4,
+  output logic [`FLEN-1:0] rd1, rd2, rd3);
   
-   logic [63:0]       rf[31:0];
+   logic [`FLEN-1:0]       rf[31:0];
   integer 	      i;
   
   // three ported register file
--- a/pipelined/src/fpu/fsgninj.sv
+++ b/pipelined/src/fpu/fsgninj.sv
@ -26,13 +26,14 @@
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
+`include "wally-config.vh"

 module fsgninj (  
 	input logic        	XSgnE, YSgnE,	// X and Y sign bits
-	input logic [63:0] 	FSrcXE,			// X
-	input logic 		FmtE,			// precision 1 = double 0 = single
+	input logic [`FLEN-1:0] 	FSrcXE,			// X
+	input logic [`FPSIZES/3:0]		FmtE,			// precision 1 = double 0 = single
 	input  logic [1:0]  SgnOpCodeE,		// operation control
-	output logic [63:0] SgnResE			// result
+	output logic [`FLEN-1:0] SgnResE			// result
 );

 	logic ResSgn;
@ -50,7 +51,30 @@ module fsgninj (
 	// format final result based on precision
 	//    - uses NaN-blocking format
 	//        - if there are any unsused bits the most significant bits are filled with 1s
-	assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]};
+	
+    if (`FPSIZES == 1)
+		assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
+
+    else if (`FPSIZES == 2)
+		assign SgnResE = FmtE ? {ResSgn, FSrcXE[`FLEN-2:0]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
+
+    else if (`FPSIZES == 3)
+        always_comb
+            case (FmtE)
+                `FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
+                `FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
+                `FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]};
+                default: SgnResE = 0;
+            endcase
+
+    else if (`FPSIZES == 4)
+        always_comb
+            case (FmtE)
+                2'h3: SgnResE = {ResSgn, FSrcXE[`Q_LEN-2:0]};
+                2'h1: SgnResE = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, FSrcXE[`D_LEN-2:0]};
+                2'h0: SgnResE = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, FSrcXE[`S_LEN-2:0]};
+                2'h2: SgnResE = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, FSrcXE[`H_LEN-2:0]};
+            endcase


 endmodule
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@ -8,26 +8,29 @@ module unpack (
    output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
    output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
    output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-    output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+    output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
    output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
    output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
    output logic                    XExpMaxE                        // does X have the maximum exponent (NaN or Inf)
 );
 
    logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
-    logic           XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero
+    logic           XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
    logic           XFracZero, YFracZero, ZFracZero; // is the fraction zero
    logic           YExpMaxE, ZExpMaxE;  // is the exponent all 1s
    
    unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), 
-                            .NaN(XNaNE), .SNaN(XSNaNE), .Denorm(XDenormE), 
-                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE));
+                            .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
+                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));

    unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), 
-                            .NaN(YNaNE), .SNaN(YSNaNE), .Denorm(YDenormE), 
-                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE));
+                            .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
+                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));

    unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), 
-                            .NaN(ZNaNE), .SNaN(ZSNaNE), .Denorm(ZDenormE), 
-                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE));
+                            .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
+                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
+    // is the input denormalized
+    assign XDenormE = ~XExpNonZero & ~XFracZero;
+    assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
 endmodule
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@ -8,25 +8,24 @@ module unpackinput (
    output logic [`NF:0]            Man,    // mantissas of XYZ (converted to largest supported precision)
    output logic                    NaN,    // is XYZ a NaN
    output logic                    SNaN, // is XYZ a signaling NaN
-    output logic                    Denorm,   // is XYZ denormalized
    output logic                    Zero,         // is XYZ zero
    output logic                    Inf,            // is XYZ infinity
+    output logic                    ExpNonZero,            // is the exponent not zero
+    output logic                    FracZero,            // is the fraction zero
    output logic                    ExpMax                       // does In have the maximum exponent (NaN or Inf)
 );
 
    logic [`NF-1:0] Frac; //Fraction of XYZ
-    logic           ExpNonZero; // is the exponent of XYZ non-zero
-    logic           FracZero; // is the fraction zero
    logic           ExpZero;
+    logic           BadNaNBox;
    
    if (`FPSIZES == 1) begin        // if there is only one floating point format supported
+        assign BadNaNBox = 0;
        assign Sgn = In[`FLEN-1];  // sign bit
        assign Frac = In[`NF-1:0];  // fraction (no assumed 1)
-        assign FracZero = ~|Frac; // is the fraction zero?
-        assign ExpNonZero = |Exp;  // is the exponent non-zero
-        assign Denorm = ~ExpNonZero & ~FracZero; // is the input (in its original format) denormalized
-        assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm};  // exponent.  Denormalized numbers have effective biased exponent of 1
-        assign ExpMax = &Exp;  // is the exponent all 1's
+        assign ExpNonZero = |In[`FLEN-2:`NF];  // is the exponent non-zero
+        assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};  // exponent.  Denormalized numbers have effective biased exponent of 1
+        assign ExpMax = &In[`FLEN-2:`NF];  // is the exponent all 1's
    end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
        //***need better names for these constants
        // largest format | smaller format
@ -47,25 +46,16 @@ module unpackinput (
        //      quad   and half
        //      double and half

-        logic  [`LEN1-1:0]  Len1; // Remove NaN boxing or NaN, if not properly NaN boxed
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
-        assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
+        assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing

        // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-        assign Sgn = FmtE ? In[`FLEN-1] : Len1[`LEN1-1];
+        assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];

        // extract the fraction, add trailing zeroes to the mantissa if nessisary
-        assign Frac = FmtE ? In[`NF-1:0] : {Len1[`NF1-1:0], (`NF-`NF1)'(0)};
+        assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};

-        // is the fraction zero
-        assign FracZero = ~|Frac;
-        
        // is the exponent non-zero
-        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |Len1[`LEN1-2:`NF1]; 
-
-        // is the input (in it's original format) denormalized
-        assign Denorm = ~ExpNonZero & ~FracZero;
+        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 

        // example double to single conversion:
        // 1023 = 0011 1111 1111
@ -77,12 +67,10 @@ module unpackinput (

        // extract the exponent, converting the smaller exponent into the larger precision if nessisary
        //      - if the original precision had a denormal number convert the exponent value 1
-        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|Denorm} : {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; 
+        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
 
-
-
        // is the exponent all 1's
-        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &Len1[`LEN1-2:`NF1];
+        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
    

    end else if (`FPSIZES == 3) begin       // three floating point precsions supported
@ -104,22 +92,21 @@ module unpackinput (
        //      quad   and double and half
        //      quad   and single and half

-        logic  [`LEN1-1:0]  Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
-        logic  [`LEN2-1:0]  Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
-        
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
-        assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision
-        assign Len2 = &In[`FLEN-1:`LEN2] ? In[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
-
+        // Check NaN boxing
+        always_comb
+            case (FmtE)
+                `FMT:  BadNaNBox = 0;
+                `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
+                `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
+                default: BadNaNBox = 0;
+            endcase

        // extract the sign bit
        always_comb
            case (FmtE)
                `FMT:  Sgn = In[`FLEN-1];
-                `FMT1: Sgn = Len1[`LEN1-1];
-                `FMT2: Sgn = Len2[`LEN2-1];
+                `FMT1: Sgn = In[`LEN1-1];
+                `FMT2: Sgn = In[`LEN2-1];
                default: Sgn = 0;
            endcase

@ -127,27 +114,20 @@ module unpackinput (
        always_comb
            case (FmtE)
                `FMT: Frac = In[`NF-1:0];
-                `FMT1: Frac = {Len1[`NF1-1:0], (`NF-`NF1)'(0)};
-                `FMT2: Frac = {Len2[`NF2-1:0], (`NF-`NF2)'(0)};
+                `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
+                `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
                default: Frac = 0;
            endcase

-        // is the fraction zero
-        assign FracZero = ~|Frac;
-
-
        // is the exponent non-zero
        always_comb
            case (FmtE)
                `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
-                `FMT1: ExpNonZero = |Len1[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
-                `FMT2: ExpNonZero = |Len2[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
+                `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
+                `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
                default: ExpNonZero = 0; 
            endcase
            
-        // is the input (in it's original format) denormalized
-        assign Denorm = ~ExpNonZero & ~FracZero;
-
        // example double to single conversion:
        // 1023 = 0011 1111 1111
        // 127  = 0000 0111 1111 (subtract this)
@ -159,9 +139,9 @@ module unpackinput (
        // convert the larger precision's exponent to use the largest precision's bias
        always_comb 
            case (FmtE)
-                `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm};
-                `FMT1: Exp = {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; 
-                `FMT2: Exp = {Len2[`LEN2-2], {`NE-`NE2{~Len2[`LEN2-2]}}, Len2[`LEN2-3:`NF2+1], Len2[`NF2]|Denorm}; 
+                `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
+                `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+                `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
                default: Exp = 0;
            endcase

@ -169,8 +149,8 @@ module unpackinput (
        always_comb
            case (FmtE)
                `FMT:  ExpMax = &In[`FLEN-2:`NF];
-                `FMT1: ExpMax = &Len1[`LEN1-2:`NF1];
-                `FMT2: ExpMax = &Len2[`LEN2-2:`NF2];
+                `FMT1: ExpMax = &In[`LEN1-2:`NF1];
+                `FMT2: ExpMax = &In[`LEN2-2:`NF2];
                default: ExpMax = 0;
            endcase

@ -184,27 +164,22 @@ module unpackinput (
        //   `Q_BIAS |  `D_BIAS |  `S_BIAS |  `H_BIAS    exponent's bias value
        //   `Q_FMT  |  `D_FMT  |  `S_FMT  |  `H_FMT     precision's format value - Q=11 D=01 S=00 H=10

-
-        logic  [`D_LEN-1:0]  Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
-        logic  [`S_LEN-1:0]  Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
-        logic  [`H_LEN-1:0]  Len3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
-        
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
-        assign Len1 = &In[`Q_LEN-1:`D_LEN] ? In[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision
-        assign Len2 = &In[`Q_LEN-1:`S_LEN] ? In[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision
-        assign Len3 = &In[`Q_LEN-1:`H_LEN] ? In[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
+        // Check NaN boxing
+        always_comb
+            case (FmtE)
+                2'b11:  BadNaNBox = 0;
+                2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
+                2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
+                2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
+            endcase

        // extract sign bit
        always_comb
            case (FmtE)
                2'b11: Sgn = In[`Q_LEN-1];
-                2'b01: Sgn = Len1[`D_LEN-1];
-                2'b00: Sgn = Len2[`S_LEN-1];
-                2'b10: Sgn = Len3[`H_LEN-1];
+                2'b01: Sgn = In[`D_LEN-1];
+                2'b00: Sgn = In[`S_LEN-1];
+                2'b10: Sgn = In[`H_LEN-1];
            endcase
            

@ -212,26 +187,20 @@ module unpackinput (
        always_comb
            case (FmtE)
                2'b11: Frac = In[`Q_NF-1:0];
-                2'b01: Frac = {Len1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
-                2'b00: Frac = {Len2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
-                2'b10: Frac = {Len3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
+                2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
+                2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
+                2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
            endcase

-        // is the fraction zero
-        assign FracZero = ~|Frac;
-
        // is the exponent non-zero
        always_comb
            case (FmtE)
                2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
-                2'b01: ExpNonZero = |Len1[`D_LEN-2:`D_NF];
-                2'b00: ExpNonZero = |Len2[`S_LEN-2:`S_NF]; 
-                2'b10: ExpNonZero = |Len3[`H_LEN-2:`H_NF]; 
+                2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
+                2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 
+                2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; 
            endcase

-        // is the input (in it's original format) denormalized
-        assign Denorm = ~ExpNonZero & ~FracZero;
-

        // example double to single conversion:
        // 1023 = 0011 1111 1111
@ -244,10 +213,10 @@ module unpackinput (
        // convert the double precsion exponent into quad precsion
        always_comb
            case (FmtE)
-                2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|Denorm};
-                2'b01: Exp = {Len1[`D_LEN-2], {`Q_NE-`D_NE{~Len1[`D_LEN-2]}}, Len1[`D_LEN-3:`D_NF+1], Len1[`D_NF]|Denorm};
-                2'b00: Exp = {Len2[`S_LEN-2], {`Q_NE-`S_NE{~Len2[`S_LEN-2]}}, Len2[`S_LEN-3:`S_NF+1], Len2[`S_NF]|Denorm};
-                2'b10: Exp = {Len3[`H_LEN-2], {`Q_NE-`H_NE{~Len3[`H_LEN-2]}}, Len3[`H_LEN-3:`H_NF+1], Len3[`H_NF]|Denorm}; 
+                2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
+                2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
+                2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
+                2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; 
            endcase


@ -255,19 +224,18 @@ module unpackinput (
        always_comb 
            case (FmtE)
                2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
-                2'b01: ExpMax = &Len1[`D_LEN-2:`D_NF];
-                2'b00: ExpMax = &Len2[`S_LEN-2:`S_NF];
-                2'b10: ExpMax = &Len3[`H_LEN-2:`H_NF];
+                2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
+                2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
+                2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
            endcase

    end

    // Output logic
-    assign ExpZero = ~ExpNonZero; // is the exponent all 0's?
+    assign FracZero = ~|Frac; // is the fraction zero?
    assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
-    //   ***  - force to be a NaN if it isn't properly Nan Boxed
-    assign NaN = ExpMax & ~FracZero; // is the input a NaN?
-    assign SNaN = NaN&~Frac[`NF-1]; // is the input a singnaling NaN?
+    assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN?
+    assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
    assign Inf = ExpMax & FracZero; // is the input infinity?
-    assign Zero = ExpZero & FracZero; // is the input zero?
+    assign Zero = ~ExpNonZero & FracZero; // is the input zero?
 endmodule
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@ -1,5 +1,5 @@
 //leading zero counter i.e. priority encoder
-module lzc #(parameter WIDTH=1) (
+module lzc #(parameter WIDTH = 1) (
    input logic  [WIDTH-1:0]            num,
    output logic [$clog2(WIDTH+1)-1:0]  ZeroCnt
 );
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@ -44,7 +44,7 @@ module srt #(parameter Nf=52) (
  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
  input  logic       W64, // 32-bit ints on XLEN=64
  input  logic       Signed, // Interpret integers as signed 2's complement
-  input  logic       Int, // Choose integer inputss
+  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
  output logic       rsign,
  output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
@ -52,7 +52,7 @@ module srt #(parameter Nf=52) (
  output logic [3:0] Flags
 );

-  logic          qp, qz, qm; // quotient is +1, 0, or -1
+  logic           qp, qz, qm; // quotient is +1, 0, or -1
  logic [`NE-1:0] calcExp;
  logic           calcSign;
  logic [Nf-1:0]  X, Dpreproc;
@ -223,17 +223,17 @@ module otfc2 #(parameter N=52) (
  output logic [N-1:0] r
 );

-  // The on-the-fly converter transfers the quotient 
+  //  The on-the-fly converter transfers the quotient 
  //  bits to the quotient as they come. 
  //
-  // This code follows the psuedocode presented in the 
+  //  This code follows the psuedocode presented in the 
  //  floating point chapter of the book. Right now, 
  //  it is written for Radix-2 division.
  //
-  // QM is Q-1. It allows us to write negative bits 
+  //  QM is Q-1. It allows us to write negative bits 
  //  without using a costly CPA. 
  logic [N+2:0] Q, QM, QNext, QMNext;
-  // QR and QMR are the shifted versions of Q and QM.
+  //  QR and QMR are the shifted versions of Q and QM.
  //  They are treated as [N-1:r] size signals, and 
  //  discard the r most significant bits of Q and QM. 
  logic [N+1:0] QR, QMR;
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -79,7 +79,6 @@ module testbenchfp;
  logic [`NF:0]         FmaRuXMan, FmaRuYMan, FmaRuZMan;
  logic [`NF:0]         FmaRdXMan, FmaRdYMan, FmaRdZMan;
  logic [`NF:0]         FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
-  logic                 XNorm;                                // is X normal
  logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
  logic                 FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
  logic                 FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
@ -92,12 +91,12 @@ module testbenchfp;
  logic                 FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
  logic                 FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
  logic                 FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
-  logic                 XDenorm, YDenorm, ZDenorm;            // is the input denormalized
-  logic                 FmaRneXDenorm, FmaRneYDenorm, FmaRneZDenorm;
-  logic                 FmaRzXDenorm, FmaRzYDenorm, FmaRzZDenorm;
-  logic                 FmaRuXDenorm, FmaRuYDenorm, FmaRuZDenorm;
-  logic                 FmaRdXDenorm, FmaRdYDenorm, FmaRdZDenorm;
-  logic                 FmaRnmXDenorm, FmaRnmYDenorm, FmaRnmZDenorm;
+  logic                 XDenorm, ZDenorm;            // is the input denormalized
+  logic                 FmaRneXDenorm, FmaRneZDenorm;
+  logic                 FmaRzXDenorm, FmaRzZDenorm;
+  logic                 FmaRuXDenorm, FmaRuZDenorm;
+  logic                 FmaRdXDenorm, FmaRdZDenorm;
+  logic                 FmaRnmXDenorm, FmaRnmZDenorm;
  logic                 XInf, YInf, ZInf;                   // is the input infinity
  logic                 FmaRneXInf, FmaRneYInf, FmaRneZInf;
  logic                 FmaRzXInf, FmaRzYInf, FmaRzZInf;
@ -683,7 +682,7 @@ module testbenchfp;
                                    .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), 
                                    .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
                                    .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), 
-                                    .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), 
+                                    .XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm), 
                                    .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
                                    .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
                                    .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
@ -693,7 +692,7 @@ module testbenchfp;
                                    .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), 
                                    .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
                                    .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), 
-                                    .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), 
+                                    .XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm), 
                                    .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
                                    .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
@ -703,7 +702,7 @@ module testbenchfp;
                                    .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), 
                                    .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
                                    .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), 
-                                    .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), 
+                                    .XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm), 
                                    .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
                                    .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
@ -713,7 +712,7 @@ module testbenchfp;
                                    .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
                                    .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
                                    .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), 
-                                    .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), 
+                                    .XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm), 
                                    .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
                                    .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
@ -723,7 +722,7 @@ module testbenchfp;
                                    .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
                                    .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
                                    .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), 
-                                    .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), 
+                                    .XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm), 
                                    .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
                                    .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
@ -733,9 +732,9 @@ module testbenchfp;
                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
                                    .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                    .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
-                                    .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), 
+                                    .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
                                    .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf),.XNormE(XNorm), .XExpMaxE(XExpMax),
+                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax),
                                    .X, .Y, .Z);


@ -1294,13 +1293,13 @@ module readfmavectors (
  output logic [`NF:0]        XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
  output logic                XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
  output logic                XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+  output logic                XDenormE, ZDenormE,   // is XYZ denormalized
  output logic                XZeroE, YZeroE, ZZeroE,         // is XYZ zero
  output logic                XInfE, YInfE, ZInfE,            // is XYZ infinity
  output logic [`FLEN-1:0]    X, Y, Z                 // inputs
 );

-  logic XNormE, XExpMaxE; // signals the unpacker outputs but isn't used in FMA
+  logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
  // apply test vectors on rising edge of clk
  // Format of vectors Inputs(1/2/3)_AnsFlg
  always @(posedge clk) begin
@ -1335,7 +1334,7 @@ module readfmavectors (
  end
  
  unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
-                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
                .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                .XExpMaxE, .ZDenormE);
 endmodule
@ -1373,10 +1372,10 @@ module readvectors (
  output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
  output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
  output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+  output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
  output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
  output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic XNormE, XExpMaxE,
+  output logic XExpMaxE,
  output logic [`FLEN-1:0] X, Y, Z
 );

@ -1660,7 +1659,7 @@ module readvectors (
  end
  
  unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
-                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
+                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                .XExpMaxE);
 endmodule
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@ -128,7 +128,8 @@ logic [3:0] dummy;
    end
  end

-  string signame, memfilename, pathname, objdumpfilename, adrstr;
+  string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile;
+  integer outputFilePointer;

  logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
  logic UARTSin, UARTSout;
@ -213,70 +214,88 @@ logic [3:0] dummy;
          $display("Benchmark: coremark is done.");
          $stop;
        end
+      // Termination condition (i.e. we finished running current test) 
      if (DCacheFlushDone) begin
- 
-        #600; // give time for instructions in pipeline to finish
-        // clear signature to prevent contamination from previous tests
-        for(i=0; i<SIGNATURESIZE; i=i+1) begin
-          sig32[i] = 'bx;
-        end
-
-        // read signature, reformat in 64 bits if necessary
-        signame = {pathname, tests[test], ".signature.output"};
-        $readmemh(signame, sig32);
-        i = 0;
-        while (i < SIGNATURESIZE) begin
-          if (`XLEN == 32) begin
-            signature[i] = sig32[i];
-            i = i+1;
-          end else begin
-            signature[i/2] = {sig32[i+1], sig32[i]};
-            i = i + 2;
-          end
-          if (i >= 4 & sig32[i-4] === 'bx) begin
-            if (i == 4) begin
-              i = SIGNATURESIZE+1; // flag empty file
-              $display("  Error: empty test file");
-            end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
-          end
-        end
-
-        // Check errors
-        errors = (i == SIGNATURESIZE+1); // error if file is empty
-        i = 0;
+        // Gets the memory location of begin_signature
        testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
        testadrNoBase = (tests[test+1].atohex())/(`XLEN/8);
-        /* verilator lint_off INFINITELOOP */
-        while (signature[i] !== 'bx) begin
-          logic [`XLEN-1:0] sig;
-          if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i];
-          else                   sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i];
-          //$display("signature[%h] = %h sig = %h", i, signature[i], sig);
-          if (signature[i] !== sig &
-          //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
-	      (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
-            if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
-              // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
-              // report errors unless they are garbage at the end of the sim
-              // kind of hacky test for garbage right now
-              $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
-              errors = errors+1;
-              $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
-                    tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
-                    //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]);
-              $stop;//***debug
+        #600; // give time for instructions in pipeline to finish
+        if (TEST == "embench") begin
+          // Writes contents of begin_signature to .sim.output file
+          // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score
+          // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking
+          $display("Embench Benchmark: %s is done.", tests[test]);
+          outputfile = {pathname, tests[test], ".sim.output"};
+          outputFilePointer = $fopen(outputfile);
+          i = 0;
+          while ($unsigned(i) < $unsigned(5'd5)) begin
+            $fdisplayh(outputFilePointer, DCacheFlushFSM.ShadowRAM[testadr+i]);
+            i = i + 1;
+          end
+          $fclose(outputFilePointer);
+          $display("Embench Benchmark: created output file: %s", outputfile);
+        end else begin 
+          // for tests with no self checking mechanism, read .signature.output file and compare to check for errors
+          // clear signature to prevent contamination from previous tests
+          for(i=0; i<SIGNATURESIZE; i=i+1) begin
+            sig32[i] = 'bx;
+          end
+          // read signature, reformat in 64 bits if necessary
+          signame = {pathname, tests[test], ".signature.output"};
+          $readmemh(signame, sig32);
+          i = 0;
+          while (i < SIGNATURESIZE) begin
+            if (`XLEN == 32) begin
+              signature[i] = sig32[i];
+              i = i+1;
+            end else begin
+              signature[i/2] = {sig32[i+1], sig32[i]};
+              i = i + 2;
+            end
+            if (i >= 4 & sig32[i-4] === 'bx) begin
+              if (i == 4) begin
+                i = SIGNATURESIZE+1; // flag empty file
+                $display("  Error: empty test file");
+              end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
            end
          end
-          i = i + 1;
-        end
-        /* verilator lint_on INFINITELOOP */
-        if (errors == 0) begin
-          $display("%s succeeded.  Brilliant!!!", tests[test]);
-        end
-        else begin
-          $display("%s failed with %d errors. :(", tests[test], errors);
-          totalerrors = totalerrors+1;
+
+          // Check errors
+          errors = (i == SIGNATURESIZE+1); // error if file is empty
+          i = 0;
+          /* verilator lint_off INFINITELOOP */
+          while (signature[i] !== 'bx) begin
+            logic [`XLEN-1:0] sig;
+            if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i];
+            else                   sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i];
+            //$display("signature[%h] = %h sig = %h", i, signature[i], sig);
+            if (signature[i] !== sig &
+            //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
+            (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
+              if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
+                // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
+                // report errors unless they are garbage at the end of the sim
+                // kind of hacky test for garbage right now
+                $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
+                errors = errors+1;
+                $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
+                      tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
+                      //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]);
+                $stop;//***debug
+              end
+            end
+            i = i + 1;
+          end
+          /* verilator lint_on INFINITELOOP */
+          if (errors == 0) begin
+            $display("%s succeeded.  Brilliant!!!", tests[test]);
+          end
+          else begin
+            $display("%s failed with %d errors. :(", tests[test], errors);
+            totalerrors = totalerrors+1;
+          end
        end
+        // move onto the next test, check to see if we're done
        test = test + 2;
        if (test == tests.size()) begin
          if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
@ -284,6 +303,7 @@ logic [3:0] dummy;
          $stop;
        end
        else begin
+            // If there are still additional tests to run, read in information for the next test
            //pathname = tvpaths[tests[0]];
            memfilename = {pathname, tests[test], ".elf.memfile"};
            //$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@ -71,7 +71,9 @@ if { $saifpower == 1 } {
 }

 # Set reset false path
-set_false_path -from [get_ports reset]
+if {$drive != "INV"} {
+    set_false_path -from [get_ports reset]
+}

 # Set Frequency in [MHz] or period in [ns]
 set my_clock_pin clk
@ -112,13 +114,13 @@ set all_in_ex_clk [remove_from_collection [all_inputs] [get_ports $my_clk]]
 if {$tech == "sky130"} {
    set_driving_cell  -lib_cell sky130_osu_sc_12T_ms__dff_1 -pin Q $all_in_ex_clk
 } elseif {$tech == "sky90"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_driving_cell -lib_cell scc9gena_inv_1 -pin Y $all_in_ex_clk
    } else {
 	set_driving_cell  -lib_cell scc9gena_dfxbp_1 -pin Q $all_in_ex_clk
    }
 } elseif {$tech == "tsmc28"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_driving_cell -lib_cell INVD1BWP30P140 -pin ZN $all_in_ex_clk
    }
 }
@ -131,13 +133,13 @@ set_output_delay 0.0 -max -clock $my_clk [all_outputs]
 if {$tech == "sky130"} {
    set_load [expr [load_of sky130_osu_sc_12T_ms_TT_1P8_25C.ccs/sky130_osu_sc_12T_ms__dff_1/D] * 1] [all_outputs]
 } elseif {$tech == "sky90"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_inv_4/A] * 1] [all_outputs]
    } else {
        set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_dfxbp_1/D] * 1] [all_outputs]
    }
 } elseif {$tech == "tsmc28"} {
-    if ($drive == "INV") {
+    if {$drive == "INV"} {
 	set_load [expr [load_of tcbn28hpcplusbwp30p140tt0p9v25c/INVD4BWP30P140/I] * 1] [all_outputs]
    }
 }
@ -323,21 +325,15 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -

 set filename [format "%s%s%s%s" $outputDir  "/reports/" $my_toplevel "_fpu_timing.rep"]
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fma/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/*} -nworst 1 }
+redirect -append $filename { echo "\n\n\n//// Critical paths through fma1 ////\n\n\n" }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma1/*} -nworst 1 }
+redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma2/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fdivsqrt/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through faddcvt ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.faddcvt/*} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through FMAResM ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.FMAResM} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through FDivResM ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.FDivResM} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through FResE ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.FResE} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through fma/SumE ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fma/SumE} -nworst 1 }
-redirect -append $filename { echo "\n\n\n//// Critical paths through fma/ProdExpE ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fpu/fpu.fma/ProdExpE} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {faddcvt/*} -nworst 1 }

 set filename [format "%s%s%s%s" $outputDir  "/reports/" $my_toplevel "_mmu_timing.rep"]
 redirect -append $filename { echo "\n\n\n//// Critical paths through immu/physicaladdress ////\n\n\n" }