Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally

2025-02-11 06:05:49 +00:00 · 2022-06-01 21:02:55 +00:00 · 2022-06-01 21:02:55 +00:00 · f550fdb7e7
commit f550fdb7e7
parent 9970654e56 4fbce9fc45
18 changed files with 462 additions and 686 deletions
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@ -1,18 +1,28 @@
 # Makefile added 1/20/22 David_Harris@hmc.edu
+# Expanded and developed by dtorres@hmc.edu
 # Compile Embench for Wally

 all: build sim

 allClean: clean all

-build:
+build: buildspeed buildsize
+
+buildspeed:
 	../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" 
 	find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
-	../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"

-sim: modelSimBuild size speed
+buildsize:
+	../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"

-modelSimBuild: objdump
+sim: modelSimBuild speed
+
+# vsim:
+# 	cd ../../pipelined/regression/
+# 	vsim -c -do "do wally-pipelined-batch.do rv32gc embench"
+# 	cd ../../benchmarks/embench/
+
+modelSimBuild: buildspeed objdump
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
 	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done

@ -20,15 +30,18 @@ size:
 	../../addins/embench-iot/benchmark_size.py --builddir=bd_size

 speed:
-	../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
+	../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1

-objdump:
-	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S "$$f" > "$$f.objdump"; done
+objdump: buildspeed
+	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done

 clean: 
 	rm -rf ../../addins/embench-iot/bd_speed/
 	rm -rf ../../addins/embench-iot/bd_size/

+allclean: clean
+	rm -rf ../../addins/embench-iot/logs/
+
 # std:
 # 	../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm" 
 # 	riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
@ -37,3 +50,5 @@ clean:
 # ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
 # --user-libs="-lm" 
 # riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
+
+# find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump.lab" | while read f; do grep -n "begin_signature" $f | cut -f1 -d:
--- a/pipelined/regression/sim-fp-batch
+++ b/pipelined/regression/sim-fp-batch
@ -7,4 +7,4 @@
 # sqrt   - test square root
 # all    - test everything

-vsim -c -do "do fp.do rv64fp fma"
+vsim -c -do "do fp.do rv64fp cvtfp"
--- a/pipelined/regression/wave.do
+++ b/pipelined/regression/wave.do
@ -76,8 +76,6 @@ add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/PMPCFG_ARRAY_RE
 add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SATP_REGW
 add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SCOUNTEREN_REGW
 add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SEPC_REGW
-add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SIE_REGW
-add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SIP_REGW
 add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/SSTATUS_REGW
 add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/STVEC_REGW
 add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
@ -420,8 +418,6 @@ add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
 add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
 add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
 add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
-add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
 add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HCLK
 add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HRESETn
 add wave -noupdate -group uart -expand -group {Bus Connection} /testbench/dut/uncore/uart/uart/HSELUART
--- a/pipelined/src/fpu/fclassify.sv
+++ b/pipelined/src/fpu/fclassify.sv
@ -5,7 +5,6 @@ module fclassify (
    input logic         XSgnE,  // sign bit
    input logic         XNaNE,  // is NaN
    input logic         XSNaNE, // is signaling NaN
-    input logic         XNormE, // is normal
    input logic         XDenormE, // is denormal
    input logic         XZeroE, // is zero
    input logic         XInfE,  // is infinity
@ -14,9 +13,10 @@ module fclassify (

    logic PInf, PZero, PNorm, PDenorm;
    logic NInf, NZero, NNorm, NDenorm;
-
+    logic XNormE;
   
    // determine the sub categories
+    assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
    assign PInf = ~XSgnE&XInfE;
    assign NInf = XSgnE&XInfE;
    assign PNorm = ~XSgnE&XNormE;
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -42,7 +42,7 @@ module fcvt (
    logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
    logic [`LGLEN-1:0]      LzcIn;      // input to the Leading Zero Counter (priority encoder)
    logic [`NE:0]           CalcExp;    // the calculated expoent
-	logic [$clog2(`LGLEN)-1:0] ShiftAmt;  // how much to shift by
+	logic [$clog2(`LGLEN+1)-1:0] ShiftAmt;  // how much to shift by
    logic [`LGLEN+`NF:0]    ShiftIn;    // number to be shifted
    logic                   ResDenormUf;// does the result underflow or is denormalized
    logic                   ResUf;      // does the result underflow
@ -72,7 +72,7 @@ module fcvt (
    logic                   Int64;      // is the integer 64 bits?
    logic                   IntToFp;       // is the opperation an int->fp conversion?
    logic                   ToInt;      // is the opperation an fp->int conversion?
-    logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC
+    logic [$clog2(`LGLEN+1)-1:0] ZeroCnt; // output from the LZC


    // seperate OpCtrl for code readability
@ -143,9 +143,9 @@ module fcvt (
    //              - only shift fp -> fp if the intital value is denormalized
    //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
    //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} :
-                    ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] : 
-                              (ZeroCnt+1)&{$clog2(`LGLEN){XDenormE|IntToFp}};
+    assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN+1)-1:0]&{$clog2(`LGLEN+1){~CalcExp[`NE]}} :
+                    ResDenormUf&~IntToFp ? ($clog2(`LGLEN+1))'(`NF-1)+CalcExp[$clog2(`LGLEN+1)-1:0] : 
+                              (ZeroCnt+1)&{$clog2(`LGLEN+1){XDenormE|IntToFp}};
    
    // shift
    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
@ -255,13 +255,13 @@ module fcvt (
    //                  |     keep        |
    //
    //              - if the input is denormalized then we dont shift... so the  "- (ZeroCnt+1)" is just leftovers from other options
-    //      int -> fp : largest bias  XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt
+    //      int -> fp : largest bias +  XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt
    //              Process:
    //                  - shifted right by XLEN (XLEN)
    //                  - shift left to normilize (-1-ZeroCnt)
    //                  - newBias to make the biased exponent
    //          
-    assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN){XDenormE|IntToFp}})};
+    assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN+1)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN+1){XDenormE|IntToFp}})};
    // find if the result is dnormal or underflows
    //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
    //      - can't underflow an integer to Fp conversion
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -472,7 +472,7 @@ module fma2(
    // Select the result
    ///////////////////////////////////////////////////////////////////////////////

-    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
+    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
@ -1002,6 +1002,7 @@ module resultselect(
    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic                     ZDenormM, // is the original precision denormalized
+    input logic 		            ZZeroM,
    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
    input logic                     PSgnM,      // the product's sign
    input logic                     ResultSgn,  // the result's sign
@ -1027,7 +1028,7 @@ module resultselect(
        end
        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1046,7 +1047,7 @@ module resultselect(
                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1066,7 +1067,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1082,7 +1083,7 @@ module resultselect(
                    end
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
                    NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1099,7 +1100,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
                    NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
@ -1137,7 +1138,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1153,7 +1154,7 @@ module resultselect(
                    end
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
                    NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
@ -1170,7 +1171,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
                    NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
@ -1188,7 +1189,7 @@ module resultselect(
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      

-                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
                    NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -95,7 +95,7 @@ module fpu (
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
   logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
   logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, YDenormE, ZDenormE;       // is the input denormalized
+   logic 		  XDenormE, ZDenormE;       // is the input denormalized
   logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
   logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
   logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
@ -103,7 +103,6 @@ module fpu (
   logic 		  XInfM, YInfM, ZInfM;                // is the input infinity - memory stage
   logic 		  XInfQ, YInfQ;                       // is the input infinity - divide
   logic 		  XExpMaxE;                           // is the exponent all ones (max value)
-   logic 		  XNormE;                             // is normal
   logic 		  FmtQ;
   logic 		  FOpCtrlQ;     

@ -177,8 +176,8 @@ module fpu (
   //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
-         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
+         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
+         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);

   // FMA
   //   - two stage FMA
@ -215,7 +214,7 @@ module fpu (
   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
   fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
-   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
+   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
   fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
              .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);

--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@ -6,553 +6,31 @@ module unpack (
    output logic                    XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
    output logic [`NE-1:0]          XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
    output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
-    output logic                    XNormE,                 // is X a normalized number
    output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
    output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-    output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+    output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
    output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
    output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
    output logic                    XExpMaxE                        // does X have the maximum exponent (NaN or Inf)
 );
 
    logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
-    logic           XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero
+    logic           XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
    logic           XFracZero, YFracZero, ZFracZero; // is the fraction zero
-    logic           XExpZero, YExpZero, ZExpZero; // is the exponent zero
    logic           YExpMaxE, ZExpMaxE;  // is the exponent all 1s
    
-    if (`FPSIZES == 1) begin        // if there is only one floating point format supported
-
-        // sign bit
-        assign XSgnE = X[`FLEN-1];
-        assign YSgnE = Y[`FLEN-1];
-        assign ZSgnE = Z[`FLEN-1];
-
-        // exponent
-        assign XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
-        assign YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
-        assign ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};
-
-        // fraction (no assumed 1)
-        assign XFracE = X[`NF-1:0];
-        assign YFracE = Y[`NF-1:0];
-        assign ZFracE = Z[`NF-1:0];
-
-        // is the exponent non-zero
-        assign XExpNonzero = |XExpE; 
-        assign YExpNonzero = |YExpE;
-        assign ZExpNonzero = |ZExpE;
-
-        // is the exponent all 1's
-        assign XExpMaxE = &XExpE;
-        assign YExpMaxE = &YExpE;
-        assign ZExpMaxE = &ZExpE;
-
-
-        // is the input (in it's original format) denormalized
-        assign XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero; 
-        assign YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero; 
-        assign ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero; 
-    
-
-    end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
-        //***need better names for these constants
-        // largest format | smaller format
-        //----------------------------------
-        //      `FLEN     |     `LEN1       length of floating point number
-        //      `NE       |     `NE1        length of exponent
-        //      `NF       |     `NF1        length of fraction
-        //      `BIAS     |     `BIAS1      exponent's bias value
-        //      `FMT      |     `FMT1       precision's format value - Q=11 D=01 S=00 H=10
-
-        // Possible combinantions specified by spec:
-        //      double and single
-        //      single and half
-
-        // Not needed but can also handle:
-        //      quad   and double
-        //      quad   and single
-        //      quad   and half
-        //      double and half
-
-        logic  [`LEN1-1:0]  XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
-        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
-        assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
-        assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};  
-
-        // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-        assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1];
-        assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1];
-        assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1];
-
-        // example double to single conversion:
-        // 1023 = 0011 1111 1111
-        // 127  = 0000 0111 1111 (subtract this)
-        // 896  = 0011 1000 0000
-        // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
-        // dexp = 0bdd dbbb bbbb 
-        // also need to take into account possible zero/denorm/inf/NaN values
-
-        // extract the exponent, converting the smaller exponent into the larger precision if nessisary
-        //      - if the original precision had a denormal number convert the exponent value 1
-        assign XExpE = FmtE ? {X[`FLEN-2:`NF+1], X[`NF]|XDenormE} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE}; 
-        assign YExpE = FmtE ? {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE}; 
-        assign ZExpE = FmtE ? {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE}; 
-
-        // is the input (in it's original format) denormalized
-
-                    // is the input (in it's original format) denormalized
-        assign XDenormE = (FmtE ? ~|X[`FLEN-2:`NF] : ~|XLen1[`LEN1-2:`NF1]) & ~XFracZero; 
-        assign YDenormE = (FmtE ? ~|Y[`FLEN-2:`NF] : ~|YLen1[`LEN1-2:`NF1]) & ~YFracZero; 
-        assign ZDenormE = (FmtE ? ~|Z[`FLEN-2:`NF] : ~|ZLen1[`LEN1-2:`NF1]) & ~ZFracZero; 
-
-        // extract the fraction, add trailing zeroes to the mantissa if nessisary
-        assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
-        assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
-        assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
-
-        // is the exponent non-zero
-        assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1]; 
-        assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1];
-        assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1];
-
-        // is the exponent all 1's
-        assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1];
-        assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1];
-        assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1];
-    
-
-    end else if (`FPSIZES == 3) begin       // three floating point precsions supported
-
-        //***need better names for these constants
-        // largest format | larger format  | smallest format
-        //---------------------------------------------------
-        //      `FLEN     |     `LEN1      |    `LEN2       length of floating point number
-        //      `NE       |     `NE1       |    `NE2        length of exponent
-        //      `NF       |     `NF1       |    `NF2        length of fraction
-        //      `BIAS     |     `BIAS1     |    `BIAS2      exponent's bias value
-        //      `FMT      |     `FMT1      |    `FMT2       precision's format value - Q=11 D=01 S=00 H=10
-
-        // Possible combinantions specified by spec:
-        //      quad   and double and single
-        //      double and single and half
-
-        // Not needed but can also handle:
-        //      quad   and double and half
-        //      quad   and single and half
-
-        logic  [`LEN1-1:0]  XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
-        logic  [`LEN2-1:0]  XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
-        
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
-        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
-        assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
-        assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; 
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision
-        assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
-        assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
-        assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; 
-
-        // There are 2 case statements
-        //      - one for other singals and one for sgn/exp/frac
-        //      - need two for the dependencies in the expoenent calculation
-        //*** pull out the ~FracZero and and it at the end
-        always_comb begin
-            case (FmtE)
-                `FMT: begin // if input is largest precision (`FLEN - ie quad or double)
-
-                    // This is the original format so set OrigDenorm to 0
-                    XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero; 
-                    YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero; 
-                    ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |X[`FLEN-2:`NF]; 
-                    YExpNonzero = |Y[`FLEN-2:`NF];
-                    ZExpNonzero = |Z[`FLEN-2:`NF];
-
-                    // is the exponent all 1's
-                    XExpMaxE = &X[`FLEN-2:`NF];
-                    YExpMaxE = &Y[`FLEN-2:`NF];
-                    ZExpMaxE = &Z[`FLEN-2:`NF];
-                end
-                `FMT1: begin    // if input is larger precsion (`LEN1 - double or single)
-
-                    // is the input (in it's original format) denormalized
-                    XDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; 
-                    YDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; 
-                    ZDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |XLen1[`LEN1-2:`NF1]; 
-                    YExpNonzero = |YLen1[`LEN1-2:`NF1];
-                    ZExpNonzero = |ZLen1[`LEN1-2:`NF1];
-
-                    // is the exponent all 1's
-                    XExpMaxE = &XLen1[`LEN1-2:`NF1];
-                    YExpMaxE = &YLen1[`LEN1-2:`NF1];
-                    ZExpMaxE = &ZLen1[`LEN1-2:`NF1];
-                end
-                `FMT2: begin        // if input is smallest precsion (`LEN2 - single or half)
-
-                    // is the input (in it's original format) denormalized
-                    XDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero; 
-                    YDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero; 
-                    ZDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |XLen2[`LEN2-2:`NF2]; 
-                    YExpNonzero = |YLen2[`LEN2-2:`NF2];
-                    ZExpNonzero = |ZLen2[`LEN2-2:`NF2];
-
-                    // is the exponent all 1's
-                    XExpMaxE = &XLen2[`LEN2-2:`NF2];
-                    YExpMaxE = &YLen2[`LEN2-2:`NF2];
-                    ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
-                end
-                default: begin
-                    XDenormE = 0; 
-                    YDenormE = 0; 
-                    ZDenormE = 0; 
-                    XExpNonzero = 0; 
-                    YExpNonzero = 0;
-                    ZExpNonzero = 0;
-                    XExpMaxE = 0;
-                    YExpMaxE = 0;
-                    ZExpMaxE = 0;
-                end
-            endcase
-        end
-        always_comb begin
-            case (FmtE)
-                `FMT: begin // if input is largest precision (`FLEN - ie quad or double)
-                    // extract the sign bit
-                    XSgnE = X[`FLEN-1];
-                    YSgnE = Y[`FLEN-1];
-                    ZSgnE = Z[`FLEN-1];
-
-                    // extract the exponent
-                    XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
-                    YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
-                    ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};
-
-                    // extract the fraction
-                    XFracE = X[`NF-1:0];
-                    YFracE = Y[`NF-1:0];
-                    ZFracE = Z[`NF-1:0];
-                end
-                `FMT1: begin    // if input is larger precsion (`LEN1 - double or single)
-
-                    // extract the sign bit
-                    XSgnE = XLen1[`LEN1-1];
-                    YSgnE = YLen1[`LEN1-1];
-                    ZSgnE = ZLen1[`LEN1-1];
-
-                    // example double to single conversion:
-                    // 1023 = 0011 1111 1111
-                    // 127  = 0000 0111 1111 (subtract this)
-                    // 896  = 0011 1000 0000
-                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
-                    // dexp = 0bdd dbbb bbbb 
-                    // also need to take into account possible zero/denorm/inf/NaN values
-
-                    // convert the larger precision's exponent to use the largest precision's bias
-                    XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE}; 
-                    YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE}; 
-                    ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE}; 
-
-                    // extract the fraction and add the nessesary trailing zeros
-                    XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
-                    YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
-                    ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
-                end
-                `FMT2: begin        // if input is smallest precsion (`LEN2 - single or half)
-
-                    // exctract the sign bit
-                    XSgnE = XLen2[`LEN2-1];
-                    YSgnE = YLen2[`LEN2-1];
-                    ZSgnE = ZLen2[`LEN2-1];
-
-                    // example double to single conversion:
-                    // 1023 = 0011 1111 1111
-                    // 127  = 0000 0111 1111 (subtract this)
-                    // 896  = 0011 1000 0000
-                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
-                    // dexp = 0bdd dbbb bbbb 
-                    // also need to take into account possible zero/denorm/inf/NaN values
-                    
-                    // convert the smallest precision's exponent to use the largest precision's bias
-                    XExpE = XDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]}}, XLen2[`LEN2-3:`NF2]}; 
-                    YExpE = YDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]}}, YLen2[`LEN2-3:`NF2]}; 
-                    ZExpE = ZDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]}}, ZLen2[`LEN2-3:`NF2]}; 
-
-                    // extract the fraction and add the nessesary trailing zeros
-                    XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
-                    YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)};
-                    ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)};
-                end
-                default: begin
-                    XSgnE = 0;
-                    YSgnE = 0;
-                    ZSgnE = 0;
-                    XExpE = 0; 
-                    YExpE = 0;
-                    ZExpE = 0; 
-                    XFracE = 0;
-                    YFracE = 0;
-                    ZFracE = 0;
-                end
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin      // if all precsisons are supported - quad, double, single, and half
-    
-        //    quad   |  double  |  single  |  half    
-        //-------------------------------------------------------------------
-        //   `Q_LEN  |  `D_LEN  |  `S_LEN  |  `H_LEN     length of floating point number
-        //   `Q_NE   |  `D_NE   |  `S_NE   |  `H_NE      length of exponent
-        //   `Q_NF   |  `D_NF   |  `S_NF   |  `H_NF      length of fraction
-        //   `Q_BIAS |  `D_BIAS |  `S_BIAS |  `H_BIAS    exponent's bias value
-        //   `Q_FMT  |  `D_FMT  |  `S_FMT  |  `H_FMT     precision's format value - Q=11 D=01 S=00 H=10
-
-
-        logic  [`D_LEN-1:0]  XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
-        logic  [`S_LEN-1:0]  XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
-        logic  [`H_LEN-1:0]  XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
-        
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
-        assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
-        assign YLen1 = &Y[`Q_LEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
-        assign ZLen1 = &Z[`Q_LEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; 
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision
-        assign XLen2 = &X[`Q_LEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
-        assign YLen2 = &Y[`Q_LEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
-        assign ZLen2 = &Z[`Q_LEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; 
-
-        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision
-        assign XLen3 = &X[`Q_LEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
-        assign YLen3 = &Y[`Q_LEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
-        assign ZLen3 = &Z[`Q_LEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; 
-
-
-        // There are 2 case statements
-        //      - one for other singals and one for sgn/exp/frac
-        //      - need two for the dependencies in the expoenent calculation
-        always_comb begin
-            case (FmtE)
-                2'b11: begin  // if input is quad percision
-
-                    // is the input (in it's original format) denormalized
-                    XDenormE = ~|X[`Q_LEN-2:`Q_NF] & ~XFracZero; 
-                    YDenormE = ~|Y[`Q_LEN-2:`Q_NF] & ~YFracZero; 
-                    ZDenormE = ~|Z[`Q_LEN-2:`Q_NF] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |X[`Q_LEN-2:`Q_NF]; 
-                    YExpNonzero = |Y[`Q_LEN-2:`Q_NF];
-                    ZExpNonzero = |Z[`Q_LEN-2:`Q_NF];
-
-                    // is the exponent all 1's
-                    XExpMaxE = &X[`Q_LEN-2:`Q_NF];
-                    YExpMaxE = &Y[`Q_LEN-2:`Q_NF];
-                    ZExpMaxE = &Z[`Q_LEN-2:`Q_NF];
-                end
-                2'b01: begin  // if input is double percision
-
-                    // is the exponent all 1's
-                    XExpMaxE = &XLen1[`D_LEN-2:`D_NF];
-                    YExpMaxE = &YLen1[`D_LEN-2:`D_NF];
-                    ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF];
-
-                    // is the input (in it's original format) denormalized
-                    XDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero; 
-                    YDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero; 
-                    ZDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |XLen1[`D_LEN-2:`D_NF]; 
-                    YExpNonzero = |YLen1[`D_LEN-2:`D_NF];
-                    ZExpNonzero = |ZLen1[`D_LEN-2:`D_NF];
-                end
-                2'b00: begin      // if input is single percision
-
-                    // is the exponent all 1's
-                    XExpMaxE = &XLen2[`S_LEN-2:`S_NF];
-                    YExpMaxE = &YLen2[`S_LEN-2:`S_NF];
-                    ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
-
-                    // is the input (in it's original format) denormalized
-                    XDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero; 
-                    YDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero; 
-                    ZDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; 
-                    YExpNonzero = |YLen2[`S_LEN-2:`S_NF];
-                    ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF];
-                end
-                2'b10: begin      // if input is half percision
-
-                    // is the exponent all 1's
-                    XExpMaxE = &XLen3[`H_LEN-2:`H_NF];
-                    YExpMaxE = &YLen3[`H_LEN-2:`H_NF];
-                    ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
-
-                    // is the input (in it's original format) denormalized
-                    XDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero; 
-                    YDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero; 
-                    ZDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero; 
-
-                    // is the exponent non-zero
-                    XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; 
-                    YExpNonzero = |YLen3[`H_LEN-2:`H_NF];
-                    ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF];
-                end
-            endcase
-        end
-
-        always_comb begin
-            case (FmtE)
-                2'b11: begin  // if input is quad percision
-                    // extract sign bit
-                    XSgnE = X[`Q_LEN-1];
-                    YSgnE = Y[`Q_LEN-1];
-                    ZSgnE = Z[`Q_LEN-1];
-
-                    // extract the exponent
-                    XExpE = {X[`Q_LEN-2:`Q_NF+1], X[`Q_NF]|XDenormE};
-                    YExpE = {Y[`Q_LEN-2:`Q_NF+1], Y[`Q_NF]|YDenormE};
-                    ZExpE = {Z[`Q_LEN-2:`Q_NF+1], Z[`Q_NF]|ZDenormE};
-
-                    // extract the fraction
-                    XFracE = X[`Q_NF-1:0];
-                    YFracE = Y[`Q_NF-1:0];
-                    ZFracE = Z[`Q_NF-1:0];
-                end
-                2'b01: begin  // if input is double percision
-                    // extract sign bit
-                    XSgnE = XLen1[`D_LEN-1];
-                    YSgnE = YLen1[`D_LEN-1];
-                    ZSgnE = ZLen1[`D_LEN-1];
-
-                    // example double to single conversion:
-                    // 1023 = 0011 1111 1111
-                    // 127  = 0000 0111 1111 (subtract this)
-                    // 896  = 0011 1000 0000
-                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
-                    // dexp = 0bdd dbbb bbbb 
-                    // also need to take into account possible zero/denorm/inf/NaN values
-                    
-                    // convert the double precsion exponent into quad precsion
-
-                    XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]}}, XLen1[`D_LEN-3:`D_NF+1], XLen1[`D_NF]|XDenormE}; 
-                    YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]}}, YLen1[`D_LEN-3:`D_NF+1], YLen1[`D_NF]|YDenormE}; 
-                    ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]}}, ZLen1[`D_LEN-3:`D_NF+1], ZLen1[`D_NF]|ZDenormE}; 
-
-                    // extract the fraction and add the nessesary trailing zeros
-                    XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
-                    YFracE = {YLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
-                    ZFracE = {ZLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
-                end
-                2'b00: begin      // if input is single percision
-                    // extract sign bit
-                    XSgnE = XLen2[`S_LEN-1];
-                    YSgnE = YLen2[`S_LEN-1];
-                    ZSgnE = ZLen2[`S_LEN-1];
-
-                    // example double to single conversion:
-                    // 1023 = 0011 1111 1111
-                    // 127  = 0000 0111 1111 (subtract this)
-                    // 896  = 0011 1000 0000
-                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
-                    // dexp = 0bdd dbbb bbbb 
-                    // also need to take into account possible zero/denorm/inf/NaN values
-                    
-                    // convert the single precsion exponent into quad precsion
-                    XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]}}, XLen2[`S_LEN-3:`S_NF+1], XLen2[`S_NF]|XDenormE}; 
-                    YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]}}, YLen2[`S_LEN-3:`S_NF+1], YLen2[`S_NF]|YDenormE}; 
-                    ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]}}, ZLen2[`S_LEN-3:`S_NF+1], ZLen2[`S_NF]|ZDenormE}; 
-
-                    // extract the fraction and add the nessesary trailing zeros
-                    XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
-                    YFracE = {YLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
-                    ZFracE = {ZLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
-                end
-                2'b10: begin      // if input is half percision
-                    // extract sign bit
-                    XSgnE = XLen3[`H_LEN-1];
-                    YSgnE = YLen3[`H_LEN-1];
-                    ZSgnE = ZLen3[`H_LEN-1];
-
-                    // example double to single conversion:
-                    // 1023 = 0011 1111 1111
-                    // 127  = 0000 0111 1111 (subtract this)
-                    // 896  = 0011 1000 0000
-                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
-                    // dexp = 0bdd dbbb bbbb 
-                    // also need to take into account possible zero/denorm/inf/NaN values
-
-                    // convert the half precsion exponent into quad precsion
-                    XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]}}, XLen3[`H_LEN-3:`H_NF+1], XLen3[`H_NF]|XDenormE}; 
-                    YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]}}, YLen3[`H_LEN-3:`H_NF+1], YLen3[`H_NF]|YDenormE}; 
-                    ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]}}, ZLen3[`H_LEN-3:`H_NF+1], ZLen3[`H_NF]|ZDenormE}; 
-
-                    // extract the fraction and add the nessesary trailing zeros
-                    XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
-                    YFracE = {YLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
-                    ZFracE = {ZLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
-                end
-            endcase
-        end
-
-    end
-
-    // is the exponent all 0's
-    assign XExpZero = ~XExpNonzero;
-    assign YExpZero = ~YExpNonzero;
-    assign ZExpZero = ~ZExpNonzero;
-
-    // is the fraction zero
-    assign XFracZero = ~|XFracE;
-    assign YFracZero = ~|YFracE;
-    assign ZFracZero = ~|ZFracE;
-
-    // add the assumed one (or zero if denormal or zero) to create the mantissa
-    assign XManE = {XExpNonzero, XFracE};
-    assign YManE = {YExpNonzero, YFracE};
-    assign ZManE = {ZExpNonzero, ZFracE};
-
-    // is X normalized
-    assign XNormE = ~(XExpMaxE|XExpZero);
-    
-    // is the input a NaN
-    //     - force to be a NaN if it isn't properly Nan Boxed
-    assign XNaNE = XExpMaxE & ~XFracZero;
-    assign YNaNE = YExpMaxE & ~YFracZero;
-    assign ZNaNE = ZExpMaxE & ~ZFracZero;
-
-    // is the input a singnaling NaN
-    assign XSNaNE = XNaNE&~XFracE[`NF-1];
-    assign YSNaNE = YNaNE&~YFracE[`NF-1];
-    assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
-
-    // // is the input denormalized
-    // assign XDenormE = XExpZero & ~XFracZero;
-    // assign YDenormE = YExpZero & ~YFracZero;
-    // assign ZDenormE = ZExpZero & ~ZFracZero;
-
-    // is the input infinity
-    assign XInfE = XExpMaxE & XFracZero;
-    assign YInfE = YExpMaxE & YFracZero;
-    assign ZInfE = ZExpMaxE & ZFracZero;
-
-    // is the input zero
-    assign XZeroE = XExpZero & XFracZero;
-    assign YZeroE = YExpZero & YFracZero;
-    assign ZZeroE = ZExpZero & ZFracZero;
-    
+    unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), 
+                            .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
+                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
+
+    unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), 
+                            .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
+                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
+
+    unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), 
+                            .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
+                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
+    // is the input denormalized
+    assign XDenormE = ~XExpNonZero & ~XFracZero;
+    assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
 endmodule
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@ -0,0 +1,241 @@
+`include "wally-config.vh"
+
+module unpackinput ( 
+    input logic  [`FLEN-1:0]        In,    // inputs from register file
+    input logic  [`FPSIZES/3:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
+    output logic                    Sgn,    // sign bits of XYZ
+    output logic [`NE-1:0]          Exp,    // exponents of XYZ (converted to largest supported precision)
+    output logic [`NF:0]            Man,    // mantissas of XYZ (converted to largest supported precision)
+    output logic                    NaN,    // is XYZ a NaN
+    output logic                    SNaN, // is XYZ a signaling NaN
+    output logic                    Zero,         // is XYZ zero
+    output logic                    Inf,            // is XYZ infinity
+    output logic                    ExpNonZero,            // is the exponent not zero
+    output logic                    FracZero,            // is the fraction zero
+    output logic                    ExpMax                       // does In have the maximum exponent (NaN or Inf)
+);
+ 
+    logic [`NF-1:0] Frac; //Fraction of XYZ
+    logic           ExpZero;
+    logic           BadNaNBox;
+    
+    if (`FPSIZES == 1) begin        // if there is only one floating point format supported
+        assign BadNaNBox = 0;
+        assign Sgn = In[`FLEN-1];  // sign bit
+        assign Frac = In[`NF-1:0];  // fraction (no assumed 1)
+        assign ExpNonZero = |In[`FLEN-2:`NF];  // is the exponent non-zero
+        assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};  // exponent.  Denormalized numbers have effective biased exponent of 1
+        assign ExpMax = &In[`FLEN-2:`NF];  // is the exponent all 1's
+    end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
+        //***need better names for these constants
+        // largest format | smaller format
+        //----------------------------------
+        //      `FLEN     |     `LEN1       length of floating point number
+        //      `NE       |     `NE1        length of exponent
+        //      `NF       |     `NF1        length of fraction
+        //      `BIAS     |     `BIAS1      exponent's bias value
+        //      `FMT      |     `FMT1       precision's format value - Q=11 D=01 S=00 H=10
+
+        // Possible combinantions specified by spec:
+        //      double and single
+        //      single and half
+
+        // Not needed but can also handle:
+        //      quad   and double
+        //      quad   and single
+        //      quad   and half
+        //      double and half
+
+        assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
+
+        // choose sign bit depending on format - 1=larger precsion 0=smaller precision
+        assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
+
+        // extract the fraction, add trailing zeroes to the mantissa if nessisary
+        assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
+
+        // is the exponent non-zero
+        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
+
+        // example double to single conversion:
+        // 1023 = 0011 1111 1111
+        // 127  = 0000 0111 1111 (subtract this)
+        // 896  = 0011 1000 0000
+        // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+        // dexp = 0bdd dbbb bbbb 
+        // also need to take into account possible zero/denorm/inf/NaN values
+
+        // extract the exponent, converting the smaller exponent into the larger precision if nessisary
+        //      - if the original precision had a denormal number convert the exponent value 1
+        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+ 
+        // is the exponent all 1's
+        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
+    
+
+    end else if (`FPSIZES == 3) begin       // three floating point precsions supported
+
+        //***need better names for these constants
+        // largest format | larger format  | smallest format
+        //---------------------------------------------------
+        //      `FLEN     |     `LEN1      |    `LEN2       length of floating point number
+        //      `NE       |     `NE1       |    `NE2        length of exponent
+        //      `NF       |     `NF1       |    `NF2        length of fraction
+        //      `BIAS     |     `BIAS1     |    `BIAS2      exponent's bias value
+        //      `FMT      |     `FMT1      |    `FMT2       precision's format value - Q=11 D=01 S=00 H=10
+
+        // Possible combinantions specified by spec:
+        //      quad   and double and single
+        //      double and single and half
+
+        // Not needed but can also handle:
+        //      quad   and double and half
+        //      quad   and single and half
+
+        // Check NaN boxing
+        always_comb
+            case (FmtE)
+                `FMT:  BadNaNBox = 0;
+                `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
+                `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
+                default: BadNaNBox = 0;
+            endcase
+
+        // extract the sign bit
+        always_comb
+            case (FmtE)
+                `FMT:  Sgn = In[`FLEN-1];
+                `FMT1: Sgn = In[`LEN1-1];
+                `FMT2: Sgn = In[`LEN2-1];
+                default: Sgn = 0;
+            endcase
+
+        // extract the fraction
+        always_comb
+            case (FmtE)
+                `FMT: Frac = In[`NF-1:0];
+                `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
+                `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
+                default: Frac = 0;
+            endcase
+
+        // is the exponent non-zero
+        always_comb
+            case (FmtE)
+                `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
+                `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
+                `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
+                default: ExpNonZero = 0; 
+            endcase
+            
+        // example double to single conversion:
+        // 1023 = 0011 1111 1111
+        // 127  = 0000 0111 1111 (subtract this)
+        // 896  = 0011 1000 0000
+        // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+        // dexp = 0bdd dbbb bbbb 
+        // also need to take into account possible zero/denorm/inf/NaN values
+
+        // convert the larger precision's exponent to use the largest precision's bias
+        always_comb 
+            case (FmtE)
+                `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
+                `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+                `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
+                default: Exp = 0;
+            endcase
+
+        // is the exponent all 1's
+        always_comb
+            case (FmtE)
+                `FMT:  ExpMax = &In[`FLEN-2:`NF];
+                `FMT1: ExpMax = &In[`LEN1-2:`NF1];
+                `FMT2: ExpMax = &In[`LEN2-2:`NF2];
+                default: ExpMax = 0;
+            endcase
+
+    end else if (`FPSIZES == 4) begin      // if all precsisons are supported - quad, double, single, and half
+    
+        //    quad   |  double  |  single  |  half    
+        //-------------------------------------------------------------------
+        //   `Q_LEN  |  `D_LEN  |  `S_LEN  |  `H_LEN     length of floating point number
+        //   `Q_NE   |  `D_NE   |  `S_NE   |  `H_NE      length of exponent
+        //   `Q_NF   |  `D_NF   |  `S_NF   |  `H_NF      length of fraction
+        //   `Q_BIAS |  `D_BIAS |  `S_BIAS |  `H_BIAS    exponent's bias value
+        //   `Q_FMT  |  `D_FMT  |  `S_FMT  |  `H_FMT     precision's format value - Q=11 D=01 S=00 H=10
+
+        // Check NaN boxing
+        always_comb
+            case (FmtE)
+                2'b11:  BadNaNBox = 0;
+                2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
+                2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
+                2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
+            endcase
+
+        // extract sign bit
+        always_comb
+            case (FmtE)
+                2'b11: Sgn = In[`Q_LEN-1];
+                2'b01: Sgn = In[`D_LEN-1];
+                2'b00: Sgn = In[`S_LEN-1];
+                2'b10: Sgn = In[`H_LEN-1];
+            endcase
+            
+
+        // extract the fraction
+        always_comb
+            case (FmtE)
+                2'b11: Frac = In[`Q_NF-1:0];
+                2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
+                2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
+                2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
+            endcase
+
+        // is the exponent non-zero
+        always_comb
+            case (FmtE)
+                2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
+                2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
+                2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 
+                2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; 
+            endcase
+
+
+        // example double to single conversion:
+        // 1023 = 0011 1111 1111
+        // 127  = 0000 0111 1111 (subtract this)
+        // 896  = 0011 1000 0000
+        // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+        // dexp = 0bdd dbbb bbbb 
+        // also need to take into account possible zero/denorm/inf/NaN values
+        
+        // convert the double precsion exponent into quad precsion
+        always_comb
+            case (FmtE)
+                2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
+                2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
+                2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
+                2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; 
+            endcase
+
+
+        // is the exponent all 1's
+        always_comb 
+            case (FmtE)
+                2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
+                2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
+                2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
+                2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
+            endcase
+
+    end
+
+    // Output logic
+    assign FracZero = ~|Frac; // is the fraction zero?
+    assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
+    assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN?
+    assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
+    assign Inf = ExpMax & FracZero; // is the input infinity?
+    assign Zero = ~ExpNonZero & FracZero; // is the input zero?
+endmodule
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@ -1,14 +1,14 @@
 //leading zero counter i.e. priority encoder
 module lzc #(parameter WIDTH=1) (
    input logic  [WIDTH-1:0]            num,
-    output logic [$clog2(WIDTH)-1:0]  ZeroCnt
+    output logic [$clog2(WIDTH+1)-1:0]  ZeroCnt
 );
 /* verilator lint_off CMPCONST */
    
-    logic [$clog2(WIDTH)-1:0] i;
+    logic [$clog2(WIDTH+1)-1:0] i;
    always_comb begin
        i = 0;
-        while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH))'(WIDTH-1))) i = i+1;  // search for leading one
+        while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH+1))'(WIDTH-1))) i = i+1;  // search for leading one
        ZeroCnt = i;
    end
 /* verilator lint_on CMPCONST */
--- a/pipelined/src/privileged/privileged.sv
+++ b/pipelined/src/privileged/privileged.sv
@ -99,13 +99,14 @@ module privileged (
  logic       STATUS_MIE, STATUS_SIE;
  logic [11:0] MIP_REGW, MIE_REGW;
  logic [1:0] NextPrivilegeModeM;
+  logic       DelegateM;

  ///////////////////////////////////////////
  // track the current privilege level
  ///////////////////////////////////////////

-  privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .InterruptM, .CauseM, 
-                    .MEDELEG_REGW, .MIDELEG_REGW, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW);
+  privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM,
+                    .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW);

  ///////////////////////////////////////////
  // decode privileged instructions
@ -158,11 +159,11 @@ module privileged (
            .LoadPageFaultM, .StoreAmoPageFaultM,
            .mretM, .sretM, 
            .PrivilegeModeW, 
-            .MIP_REGW, .MIE_REGW, .MIDELEG_REGW,
+            .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW,
            .STATUS_MIE, .STATUS_SIE,
            .InstrValidM, .CommittedM,  
            .TrapM, .RetM,
-            .InterruptM, .IntPendingM,
+            .InterruptM, .IntPendingM, .DelegateM,
            .CauseM);
 endmodule

--- a/pipelined/src/privileged/privmode.sv
+++ b/pipelined/src/privileged/privmode.sv
@ -33,30 +33,22 @@

 module privmode (
  input  logic             clk, reset,
-  input  logic             StallW, TrapM, mretM, sretM, InterruptM,
-  input  logic [`LOG_XLEN-1:0] CauseM, 
-  input  logic [`XLEN-1:0] MEDELEG_REGW,
-  input  logic [11:0]      MIDELEG_REGW,
+  input  logic             StallW, TrapM, mretM, sretM,
+  input  logic             DelegateM,
  input  logic [1:0]       STATUS_MPP,
  input  logic             STATUS_SPP,
  output logic [1:0]       NextPrivilegeModeM, PrivilegeModeW
 ); 
  
  if (`U_SUPPORTED) begin:privmode
-    logic       md;
-
-    // get bits of DELEG registers based on CAUSE
-    assign md = InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM];
-    
    // PrivilegeMode FSM
    always_comb begin
      if (TrapM) begin // Change privilege based on DELEG registers (see 3.1.8)
-        if (`S_SUPPORTED & md & (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE))
-                          NextPrivilegeModeM = `S_MODE;
-        else              NextPrivilegeModeM = `M_MODE;
-      end else if (mretM) NextPrivilegeModeM = STATUS_MPP;
-      else if (sretM)     NextPrivilegeModeM = {1'b0, STATUS_SPP};
-      else                NextPrivilegeModeM = PrivilegeModeW;
+        if (`S_SUPPORTED & DelegateM) NextPrivilegeModeM = `S_MODE;
+        else                          NextPrivilegeModeM = `M_MODE;
+      end else if (mretM)             NextPrivilegeModeM = STATUS_MPP;
+      else if (sretM)                 NextPrivilegeModeM = {1'b0, STATUS_SPP};
+      else                            NextPrivilegeModeM = PrivilegeModeW;
    end

    flopenl #(2) privmodereg(clk, reset, ~StallW, NextPrivilegeModeM, `M_MODE, PrivilegeModeW);
--- a/pipelined/src/privileged/trap.sv
+++ b/pipelined/src/privileged/trap.sv
@ -39,11 +39,12 @@ module trap (
  (* mark_debug = "true" *) input logic 		   LoadPageFaultM, StoreAmoPageFaultM,
  (* mark_debug = "true" *) input logic 		   mretM, sretM, 
  input logic [1:0] 	   PrivilegeModeW, 
-  (* mark_debug = "true" *) input logic [11:0] 	   MIP_REGW, MIE_REGW, MIDELEG_REGW,
+  (* mark_debug = "true" *) input logic [11:0] 	   MIP_REGW, MIE_REGW, MIDELEG_REGW, 
+  input logic [`XLEN-1:0] MEDELEG_REGW,
  input logic 		   STATUS_MIE, STATUS_SIE,
  input logic 		   InstrValidM, CommittedM, 
  output logic 		   TrapM, RetM,
-  output logic 		   InterruptM, IntPendingM,
+  output logic 		   InterruptM, IntPendingM, DelegateM,
  output logic [`LOG_XLEN-1:0] CauseM 
 );

@ -63,6 +64,8 @@ module trap (
  assign IntPendingM = |PendingIntsM;
  assign ValidIntsM = {12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW; 
  assign InterruptM = (|ValidIntsM) && InstrValidM && ~(CommittedM);  // *** RT. CommittedM is a temporary hack to prevent integer division from having an interrupt during divide.
+  assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) & 
+                     (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE);

  ///////////////////////////////////////////
  // Trigger Traps and RET
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@ -44,7 +44,7 @@ module srt #(parameter Nf=52) (
  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
  input  logic       W64, // 32-bit ints on XLEN=64
  input  logic       Signed, // Interpret integers as signed 2's complement
-  input  logic       Int, // Choose integer inputss
+  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
  output logic       rsign,
  output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
@ -52,7 +52,7 @@ module srt #(parameter Nf=52) (
  output logic [3:0] Flags
 );

-  logic          qp, qz, qm; // quotient is +1, 0, or -1
+  logic           qp, qz, qm; // quotient is +1, 0, or -1
  logic [`NE-1:0] calcExp;
  logic           calcSign;
  logic [Nf-1:0]  X, Dpreproc;
@ -223,17 +223,17 @@ module otfc2 #(parameter N=52) (
  output logic [N-1:0] r
 );

-  // The on-the-fly converter transfers the quotient 
+  //  The on-the-fly converter transfers the quotient 
  //  bits to the quotient as they come. 
  //
-  // This code follows the psuedocode presented in the 
+  //  This code follows the psuedocode presented in the 
  //  floating point chapter of the book. Right now, 
  //  it is written for Radix-2 division.
  //
-  // QM is Q-1. It allows us to write negative bits 
+  //  QM is Q-1. It allows us to write negative bits 
  //  without using a costly CPA. 
  logic [N+2:0] Q, QM, QNext, QMNext;
-  // QR and QMR are the shifted versions of Q and QM.
+  //  QR and QMR are the shifted versions of Q and QM.
  //  They are treated as [N-1:r] size signals, and 
  //  discard the r most significant bits of Q and QM. 
  logic [N+1:0] QR, QMR;
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -79,7 +79,6 @@ module testbenchfp;
  logic [`NF:0]         FmaRuXMan, FmaRuYMan, FmaRuZMan;
  logic [`NF:0]         FmaRdXMan, FmaRdYMan, FmaRdZMan;
  logic [`NF:0]         FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
-  logic                 XNorm;                                // is X normal
  logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
  logic                 FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
  logic                 FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
@ -92,12 +91,12 @@ module testbenchfp;
  logic                 FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
  logic                 FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
  logic                 FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
-  logic                 XDenorm, YDenorm, ZDenorm;            // is the input denormalized
-  logic                 FmaRneXDenorm, FmaRneYDenorm, FmaRneZDenorm;
-  logic                 FmaRzXDenorm, FmaRzYDenorm, FmaRzZDenorm;
-  logic                 FmaRuXDenorm, FmaRuYDenorm, FmaRuZDenorm;
-  logic                 FmaRdXDenorm, FmaRdYDenorm, FmaRdZDenorm;
-  logic                 FmaRnmXDenorm, FmaRnmYDenorm, FmaRnmZDenorm;
+  logic                 XDenorm, ZDenorm;            // is the input denormalized
+  logic                 FmaRneXDenorm, FmaRneZDenorm;
+  logic                 FmaRzXDenorm, FmaRzZDenorm;
+  logic                 FmaRuXDenorm, FmaRuZDenorm;
+  logic                 FmaRdXDenorm, FmaRdZDenorm;
+  logic                 FmaRnmXDenorm, FmaRnmZDenorm;
  logic                 XInf, YInf, ZInf;                   // is the input infinity
  logic                 FmaRneXInf, FmaRneYInf, FmaRneZInf;
  logic                 FmaRzXInf, FmaRzYInf, FmaRzZInf;
@ -683,7 +682,7 @@ module testbenchfp;
                                    .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), 
                                    .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
                                    .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), 
-                                    .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), 
+                                    .XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm), 
                                    .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
                                    .XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
                                    .X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
@ -693,7 +692,7 @@ module testbenchfp;
                                    .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), 
                                    .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
                                    .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), 
-                                    .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), 
+                                    .XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm), 
                                    .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
                                    .XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
@ -703,7 +702,7 @@ module testbenchfp;
                                    .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), 
                                    .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
                                    .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), 
-                                    .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), 
+                                    .XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm), 
                                    .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
                                    .XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
@ -713,7 +712,7 @@ module testbenchfp;
                                    .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
                                    .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
                                    .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), 
-                                    .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), 
+                                    .XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm), 
                                    .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
                                    .XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
@ -723,7 +722,7 @@ module testbenchfp;
                                    .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
                                    .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
                                    .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), 
-                                    .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), 
+                                    .XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm), 
                                    .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
                                    .XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
                                    .X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
@ -733,9 +732,9 @@ module testbenchfp;
                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
                                    .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                    .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
-                                    .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), 
+                                    .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
                                    .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf),.XNormE(XNorm), .XExpMaxE(XExpMax),
+                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax),
                                    .X, .Y, .Z);


@ -1294,13 +1293,13 @@ module readfmavectors (
  output logic [`NF:0]        XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
  output logic                XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
  output logic                XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+  output logic                XDenormE, ZDenormE,   // is XYZ denormalized
  output logic                XZeroE, YZeroE, ZZeroE,         // is XYZ zero
  output logic                XInfE, YInfE, ZInfE,            // is XYZ infinity
  output logic [`FLEN-1:0]    X, Y, Z                 // inputs
 );

-  logic XNormE, XExpMaxE; // signals the unpacker outputs but isn't used in FMA
+  logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
  // apply test vectors on rising edge of clk
  // Format of vectors Inputs(1/2/3)_AnsFlg
  always @(posedge clk) begin
@ -1335,7 +1334,7 @@ module readfmavectors (
  end
  
  unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
-                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
                .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                .XExpMaxE, .ZDenormE);
 endmodule
@ -1373,10 +1372,10 @@ module readvectors (
  output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
  output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
  output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
+  output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
  output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
  output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic XNormE, XExpMaxE,
+  output logic XExpMaxE,
  output logic [`FLEN-1:0] X, Y, Z
 );

@ -1660,7 +1659,7 @@ module readvectors (
  end
  
  unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
-                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
+                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
                .XExpMaxE);
 endmodule
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@ -119,6 +119,7 @@ logic [3:0] dummy;
        "wally32i":                       tests = wally32i; // *** redo
        "wally32e":                       tests = wally32e; 
        "wally32priv":                    tests = wally32priv; // *** redo
+        "embench":                        tests = embench;
      endcase
    end
    if (tests.size() == 0) begin
@ -127,7 +128,8 @@ logic [3:0] dummy;
    end
  end

-  string signame, memfilename, pathname;
+  string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile;
+  integer outputFilePointer;

  logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
  logic UARTSin, UARTSout;
@ -212,70 +214,88 @@ logic [3:0] dummy;
          $display("Benchmark: coremark is done.");
          $stop;
        end
+      // Termination condition (i.e. we finished running current test) 
      if (DCacheFlushDone) begin
- 
-        #600; // give time for instructions in pipeline to finish
-        // clear signature to prevent contamination from previous tests
-        for(i=0; i<SIGNATURESIZE; i=i+1) begin
-          sig32[i] = 'bx;
-        end
-
-        // read signature, reformat in 64 bits if necessary
-        signame = {pathname, tests[test], ".signature.output"};
-        $readmemh(signame, sig32);
-        i = 0;
-        while (i < SIGNATURESIZE) begin
-          if (`XLEN == 32) begin
-            signature[i] = sig32[i];
-            i = i+1;
-          end else begin
-            signature[i/2] = {sig32[i+1], sig32[i]};
-            i = i + 2;
-          end
-          if (i >= 4 & sig32[i-4] === 'bx) begin
-            if (i == 4) begin
-              i = SIGNATURESIZE+1; // flag empty file
-              $display("  Error: empty test file");
-            end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
-          end
-        end
-
-        // Check errors
-        errors = (i == SIGNATURESIZE+1); // error if file is empty
-        i = 0;
+        // Gets the memory location of begin_signature
        testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
        testadrNoBase = (tests[test+1].atohex())/(`XLEN/8);
-        /* verilator lint_off INFINITELOOP */
-        while (signature[i] !== 'bx) begin
-          logic [`XLEN-1:0] sig;
-          if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i];
-          else                   sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i];
-          //$display("signature[%h] = %h sig = %h", i, signature[i], sig);
-          if (signature[i] !== sig &
-          //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
-	      (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
-            if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
-              // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
-              // report errors unless they are garbage at the end of the sim
-              // kind of hacky test for garbage right now
-              $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
-              errors = errors+1;
-              $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
-                    tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
-                    //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]);
-              $stop;//***debug
+        #600; // give time for instructions in pipeline to finish
+        if (TEST == "embench") begin
+          // Writes contents of begin_signature to .sim.output file
+          // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score
+          // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking
+          $display("Embench Benchmark: %s is done.", tests[test]);
+          outputfile = {pathname, tests[test], ".sim.output"};
+          outputFilePointer = $fopen(outputfile);
+          i = 0;
+          while ($unsigned(i) < $unsigned(5'd5)) begin
+            $fdisplayh(outputFilePointer, DCacheFlushFSM.ShadowRAM[testadr+i]);
+            i = i + 1;
+          end
+          $fclose(outputFilePointer);
+          $display("Embench Benchmark: created output file: %s", outputfile);
+        end else begin 
+          // for tests with no self checking mechanism, read .signature.output file and compare to check for errors
+          // clear signature to prevent contamination from previous tests
+          for(i=0; i<SIGNATURESIZE; i=i+1) begin
+            sig32[i] = 'bx;
+          end
+          // read signature, reformat in 64 bits if necessary
+          signame = {pathname, tests[test], ".signature.output"};
+          $readmemh(signame, sig32);
+          i = 0;
+          while (i < SIGNATURESIZE) begin
+            if (`XLEN == 32) begin
+              signature[i] = sig32[i];
+              i = i+1;
+            end else begin
+              signature[i/2] = {sig32[i+1], sig32[i]};
+              i = i + 2;
+            end
+            if (i >= 4 & sig32[i-4] === 'bx) begin
+              if (i == 4) begin
+                i = SIGNATURESIZE+1; // flag empty file
+                $display("  Error: empty test file");
+              end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
            end
          end
-          i = i + 1;
-        end
-        /* verilator lint_on INFINITELOOP */
-        if (errors == 0) begin
-          $display("%s succeeded.  Brilliant!!!", tests[test]);
-        end
-        else begin
-          $display("%s failed with %d errors. :(", tests[test], errors);
-          totalerrors = totalerrors+1;
+
+          // Check errors
+          errors = (i == SIGNATURESIZE+1); // error if file is empty
+          i = 0;
+          /* verilator lint_off INFINITELOOP */
+          while (signature[i] !== 'bx) begin
+            logic [`XLEN-1:0] sig;
+            if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i];
+            else                   sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i];
+            //$display("signature[%h] = %h sig = %h", i, signature[i], sig);
+            if (signature[i] !== sig &
+            //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
+            (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
+              if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
+                // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
+                // report errors unless they are garbage at the end of the sim
+                // kind of hacky test for garbage right now
+                $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
+                errors = errors+1;
+                $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
+                      tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
+                      //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]);
+                $stop;//***debug
+              end
+            end
+            i = i + 1;
+          end
+          /* verilator lint_on INFINITELOOP */
+          if (errors == 0) begin
+            $display("%s succeeded.  Brilliant!!!", tests[test]);
+          end
+          else begin
+            $display("%s failed with %d errors. :(", tests[test], errors);
+            totalerrors = totalerrors+1;
+          end
        end
+        // move onto the next test, check to see if we're done
        test = test + 2;
        if (test == tests.size()) begin
          if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
@ -283,6 +303,7 @@ logic [3:0] dummy;
          $stop;
        end
        else begin
+            // If there are still additional tests to run, read in information for the next test
            //pathname = tvpaths[tests[0]];
            memfilename = {pathname, tests[test], ".elf.memfile"};
            //$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@ -28,6 +28,7 @@
 `define WALLYTEST "2"
 `define MYIMPERASTEST   "3"
 `define COREMARK "4"
+`define EMBENCH "5"
 // *** remove MYIMPERASTEST cases when ported 

 string tvpaths[] = '{
@ -35,15 +36,44 @@ string tvpaths[] = '{
    "../../addins/riscv-arch-test/work/",
    "../../tests/wally-riscv-arch-test/work/",
    "../../tests/imperas-riscv-tests/work/",
-    "../../benchmarks/riscv-coremark/work/"
+    "../../benchmarks/riscv-coremark/work/",
+    "../../addins/embench-iot/bd_speed/src/"
 };

+
+
   // *** make sure these are somewhere
  string coremark[] = '{
    `COREMARK,
    "coremark.bare.riscv", "100000"
  };

+  string embench[] = '{
+    `EMBENCH,
+    "aha-mont64/aha-mont64", "1080",
+    "crc32/crc32", "1080",
+    "cubic/cubic", "9080",
+    "edn/edn", "1080",
+    "huffbench/huffbench", "5080",
+    "matmult-int/matmult-int", "1080",
+    "md5sum/md5sum", "4080",
+    "minver/minver", "2080",
+    "nbody/nbody", "2080",
+    "nettle-aes/nettle-aes", "1080",
+    "nettle-sha256/nettle-sha256", "2080",
+    "nsichneu/nsichneu", "4080",
+    "picojpeg/picojpeg", "3080",
+    "primecount/primecount", "1080",
+    "qrduino/qrduino", "6080",
+    "sglib-combined/sglib-combined", "5080",
+    "slre/slre", "1080",
+    "st/st", "2080",
+    "statemate/statemate", "2080",
+    "tarfind/tarfind", "4080",
+    "ud/ud", "1080",
+    "wikisort/wikisort", "3080"
+  };
+
  string wally64a[] = '{
    `WALLYTEST,
    "rv64i_m/privilege/WALLY-amo", "2210",