fixed merge conflicts

2025-02-11 06:05:49 +00:00 · 2022-05-28 09:44:55 +00:00 · 2022-05-28 09:44:55 +00:00 · 80315fedff
commit 80315fedff
parent 4335895b21 4ed7283ad1
29 changed files with 3717 additions and 2683 deletions
--- a/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
+++ b/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@ -1,15 +1,39 @@
 # Makefile added 1/20/22 David_Harris@hmc.edu
 # Compile Embench for Wally

-all: Makefile
-	../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cflags "-O2 -march=rv32i -mabi=ilp32 -mcmodel=medany" --cc riscv64-unknown-elf-gcc 
-	./benchmark_size.py 
-	./benchmark_speed.py
+all: build sim

-# view with
-# more `ls -t | head -1`
+allClean: clean all

+build:
+	../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-nostartfiles" 
+	find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
+	../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles" --cflags="-msave-restore" --dummy-libs="libgcc libm libc crt0"
+
+sim: modelSimBuild size speed
+
+modelSimBuild: objdump
+	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
+	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
+
+size:
+	../../addins/embench-iot/benchmark_size.py --builddir=bd_size
+
+speed:
+	../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=50
+
+objdump:
+	find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S "$$f" > "$$f.objdump"; done
+
+clean: 
+	rm -rf ../../addins/embench-iot/bd_speed/
+	rm -rf ../../addins/embench-iot/bd_size/
+
+# std:
+# 	../../addins/embench-iot/build_all.py --builddir=bd_std --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-v -c -O2 -ffunction-sections -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -v -march=rv32imac -mabi=ilp32 ../../../../../benchmarks/embench/tohost.S -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --user-libs="-lm" 
+# 	riscv64-unknown-elf-objdump -D ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64 > ../../addins/embench-iot/bd_std/src/aha-mont64/aha-mont64.objdump
+# --dummy-libs="libgcc libm libc"
 # --cflags "-O2 -g -nostartfiles"
-
-
-#riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
+# ../../addins/embench-iot/build_all.py --arch riscv32 --chip generic --board rv32wallyverilog --cc riscv64-unknown-elf-gcc --cflags="-c -Os -ffunction-sections -nostdlib -march=rv32imac -mabi=ilp32" --ldflags="-Wl,-gc-sections -nostdlib -march=rv32imac -mabi=ilp32 -T../../../config/riscv32/boards/rv32wallyverilog/link.ld" --dummy-libs="libgcc libm libc"
+# --user-libs="-lm" 
+# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
--- a/benchmarks/embench/Makefile~
+++ b/benchmarks/embench/Makefile~
@ -1,7 +0,0 @@
-# Makefile added 1/20/22 David_Harris@hmc.edu
-# Compile Embench for Wally
-
-all: Makefile
-	./build_all.py --arch riscv32 --chip generic --board ri5cyverilator --cc riscv64-unknown-elf-gcc 
-	./benchmark_size.py 
-	./benchmark_speed.py
--- a/pipelined/regression/fp.do
+++ b/pipelined/regression/fp.do
@ -32,7 +32,7 @@ vlib work
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
-vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697 
+vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 

 vsim -voptargs=+acc work.testbenchfp -G TEST=$2

--- a/pipelined/regression/regression-wally
+++ b/pipelined/regression/regression-wally
@ -46,7 +46,7 @@ configs = [
 ]
 def getBuildrootTC(short):
    INSTR_LIMIT = 4000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
-    MAX_EXPECTED = 246000000
+    MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
    if short:
        BRcmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0\n!"
        BRgrepstr=str(INSTR_LIMIT)+" instructions"
--- a/pipelined/regression/sim-wally
+++ b/pipelined/regression/sim-wally
@ -1,2 +1,2 @@
-vsim -do "do wally-pipelined.do rv64gc imperas64f"
+vsim -do "do wally-pipelined.do rv64gc imperas64d"

--- a/pipelined/regression/wally-pipelined-batch.do
+++ b/pipelined/regression/wally-pipelined-batch.do
@ -51,7 +51,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
    #vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
    #vsim -coverage -lib work_$2 workopt_$2

-    # power add generates the logging necessary for saif generation.
+    # power add generates the logging necessary for said generation.
    # power add -r /dut/core/*
    run -all
    # power off -r /dut/core/*
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@ -11,97 +11,97 @@ module fctrl (
  output logic       FDivStartD,  // Start division or squareroot
  output logic [1:0] FResultSelD, // select result to be written to fp register
  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
-  output logic [2:0] FResSelD,    // select one of the results done in the memory stage
+  output logic [1:0] FResSelD,    // select one of the results done in the memory stage
  output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
  output logic       FmtD,        // precision - single-0 double-1
  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
  output logic       FWriteIntD   // is the result written to the integer register
  );

-  `define FCTRLW 14
+  `define FCTRLW 13
  logic [`FCTRLW-1:0] ControlsD;
  // FPU Instruction Decoder
  always_comb
    if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
-      ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1;
+      ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
    else case(OpD)
    // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
      7'b0000111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b1_0_00_000_000_00_0_0; // flw
-                    3'b011:  ControlsD = `FCTRLW'b1_0_00_001_000_00_0_0; // fld
-                    default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
+                    3'b011:  ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
+                    default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                  endcase
      7'b0100111: case(Funct3D)
-                    3'b010:  ControlsD = `FCTRLW'b0_0_00_010_000_00_0_0; // fsw
-                    3'b011:  ControlsD = `FCTRLW'b0_0_00_011_000_00_0_0; // fsd
-                    default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                    3'b010:  ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
+                    3'b011:  ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
+                    default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                  endcase
-      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_000_000_00_0_0; // fmadd
-      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_001_000_00_0_0; // fmsub
-      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_010_000_00_0_0; // fnmsub
-      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_011_000_00_0_0; // fnmadd
+      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
+      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
+      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
+      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
      7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_000_00_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_000_00_0_0; // fsub
-                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_000_00_0_0; // fmul
-                    7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_000_00_1_0; // fdiv
-                    7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_000_00_1_0; // fsqrt
+                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
+                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
+                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
+                    7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
+                    7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
                    7'b00100??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_000_001_00_0_0; // fsgnj
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_001_001_00_0_0; // fsgnjn
-                                  3'b010:  ControlsD = `FCTRLW'b1_0_11_010_001_00_0_0; // fsgnjx
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
+                                  3'b010:  ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b00101??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_111_010_00_0_0; // fmin
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_101_010_00_0_0; // fmax
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                                  3'b000:  ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
+                                  3'b001:  ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b10100??: case(Funct3D)
-                                  3'b010:  ControlsD = `FCTRLW'b0_1_11_010_010_00_0_0; // feq
-                                  3'b001:  ControlsD = `FCTRLW'b0_1_11_001_010_00_0_0; // flt
-                                  3'b000:  ControlsD = `FCTRLW'b0_1_11_011_010_00_0_0; // fle
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                                  3'b010:  ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
+                                  3'b001:  ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
+                                  3'b000:  ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
-                    7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_000_10_0_0; // fclass
-                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_000_01_0_0; // fmv.x.w
-                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_000_01_0_0; // fmv.x.d
-                                else                            ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
-                    7'b1101000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.s.w
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.s.wu
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.s.l
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.s.lu
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                    7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
+                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
+                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
+                                else                            ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
+                    7'b1101000: case(Rs2D[1:0])//***reduce resSel
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w   w->s
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l   l->s
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b1100000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.s
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.s
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.s
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.s
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s   s->w
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s  s->wu
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s   s->l
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s  s->lu
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
-                    7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_000_00_0_0; // fmv.w.x
-                    7'b010000?: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.s.d
+                    7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
+                    7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
                    7'b1101001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_000_011_00_0_0; // fcvt.d.w
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_010_011_00_0_0; // fcvt.d.wu
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_100_011_00_0_0; // fcvt.d.l
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_011_00_0_0; // fcvt.d.lu
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                                  2'b00:    ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w   w->d
+                                  2'b01:    ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
+                                  2'b10:    ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l   l->d
+                                  2'b11:    ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
                    7'b1100001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_011_11_0_0; // fcvt.w.d
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_011_011_11_0_0; // fcvt.wu.d
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_101_011_11_0_0; // fcvt.l.d
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_111_011_11_0_0; // fcvt.lu.d
-                                  default: ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+                                  2'b00:    ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d   d->w
+                                  2'b01:    ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d  d->wu
+                                  2'b10:    ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d   d->l
+                                  2'b11:    ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d  d->lu
+                                  default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                                endcase
-                    7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_000_00_0_0; // fmv.d.x
-                    //7'b0100001: ControlsD = `FCTRLW'b1_0_11_000_100_00_0_0; // fcvt.d.s
-                    default:    ControlsD = `FCTRLW'b0_0_00_000_100_00_0_1; // non-implemented instruction
+                    7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
+                    7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
+                    default:    ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
                  endcase
-      default:      ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1; // non-implemented instruction
+      default:      ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
    endcase

  // unswizzle control bits
@ -119,7 +119,7 @@ module fctrl (
  // Precision
  //    0-single
  //    1-double
-  assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : FResSelD == 3'b100 | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
+  assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];

  // FResultSel:
  //    000 - ReadRes - load
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -0,0 +1,803 @@
+
+`include "wally-config.vh"
+// largest length in IEU/FPU
+`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
+
+module fcvt (
+    input logic             XSgnE,          // input's sign
+    input logic [`NE-1:0]   XExpE,          // input's exponent
+    input logic [`NF:0]     XManE,          // input's fraction
+    input logic [`XLEN-1:0] ForwardedSrcAE, // integer input - from IEU
+    input logic [2:0]       FOpCtrlE,       // choose which opperation (look below for values)
+    input logic             FWriteIntE,     // is fp->int (since it's writting to the integer register)
+    input logic             XZeroE,         // is the input zero
+    input logic             XDenormE,   // is the input denormalized
+    input logic             XInfE,          // is the input infinity
+    input logic             XNaNE,          // is the input a NaN
+    input logic             XSNaNE,         // is the input a signaling NaN
+    input logic [2:0]       FrmE,           // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic [`FPSIZES/3:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
+    output logic [`FLEN-1:0] CvtResE,       // the fp conversion result
+    output logic [`XLEN-1:0] CvtIntResE,    // the int conversion result
+    output logic [4:0]      CvtFlgE         // the conversion's flags
+    );
+
+    // OpCtrls:
+    //  fp->fp conversions: {0, output precision} - only one of the operations writes to the int register
+    //      half   - 10
+    //      single - 00
+    //      double - 01
+    //      quad   - 11
+    //  int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?}
+    //                            bit 2              bit 1                   bit 0
+    //      for example: signed long -> single floating point has the OpCode 101
+
+    // (FF) fp  -> fp coversion signals
+    // (IF) int -> fp coversion signals
+    // (FI) fp  -> int coversion signals
+
+
+    logic [`FPSIZES/3:0]    OutFmt;     // format of the output
+    logic [`XLEN-1:0]       PosInt;     // the positive integer input
+    logic [`XLEN-1:0]       TrimInt;    // integer trimmed to the correct size
+    logic [`LGLEN-1:0]      LzcIn;      // input to the Leading Zero Counter (priority encoder)
+    logic [`NE:0]           CalcExp;    // the calculated expoent
+	logic [$clog2(`LGLEN)-1:0] ShiftAmt;  // how much to shift by
+    logic [`LGLEN+`NF:0]    ShiftIn;    // number to be shifted
+    logic                   ResDenormUf;// does the result underflow or is denormalized
+    logic                   ResUf;      // does the result underflow
+    logic [`LGLEN+`NF:0]    Shifted;    // the shifted result
+    logic [`NE-2:0]         NewBias;    // the bias of the final result
+    logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
+    logic [`NE-1:0]	        OldExp;     // the old exponent
+    logic                   ResSgn;     // the result's sign
+    logic                   Sticky;     // sticky bit - for rounding
+    logic                   Round;      // round bit - for rounding
+    logic                   LSBFrac;    // the least significant bit of the fraction - for rounding
+    logic                   CalcPlus1;  // the calculated plus 1
+    logic                   Plus1;      // add one to the final result?
+    logic [`FLEN-1:0]       ShiftedPlus1;   // plus one shifted to the proper position
+    logic [`NE:0]           FullResExp; // the full result exponent (with the overflow bit) 
+    logic [`NE-1:0]         ResExp;     // the result's exponent (trimmed to the correct size)
+    logic [`NF-1:0]         ResFrac;    // the result's fraction
+    logic [`XLEN+1:0]       NegRes;     // the negation of the result
+    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
+    logic                   Overflow, Underflow, Inexact, Invalid; // flags
+    logic                   IntInexact, FpInexact, IntInvalid, FpInvalid;   // flags for FP and int outputs
+    logic [`NE-1:0]         MaxExp;         // the maximum exponent before overflow
+    logic [1:0]             NegResMSBS;     // the negitive integer result's most significant bits
+    logic [`FLEN-1:0]       NaNRes, InfRes, Res, UfRes; //various special results
+    logic                   KillRes;    // kill the result?
+    logic                   Signed;     // is the opperation with a signed integer?
+    logic                   Int64;      // is the integer 64 bits?
+    logic                   IntToFp;       // is the opperation an int->fp conversion?
+    logic                   ToInt;      // is the opperation an fp->int conversion?
+    logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC
+
+
+    // seperate OpCtrl for code readability
+    assign Signed = FOpCtrlE[0];
+    assign Int64 =  FOpCtrlE[1];
+    assign IntToFp =   FOpCtrlE[2];
+    assign ToInt =  FWriteIntE;
+
+    // choose the ouptut format depending on the opperation
+    //      - fp -> fp: OpCtrl contains the percision of the output
+    //      - int -> fp: FmtE contains the percision of the output
+    if (`FPSIZES == 2) 
+        assign OutFmt = IntToFp ? FmtE : (FOpCtrlE[1:0] == `FMT); 
+    else if (`FPSIZES == 3 | `FPSIZES == 4) 
+        assign OutFmt = IntToFp ? FmtE : FOpCtrlE[1:0]; 
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // negation
+    ///////////////////////////////////////////////////////////////////////////
+    // 1) negate the input if the input is a negitive singed integer
+    // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
+
+    assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
+    assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
+
+    ///////////////////////////////////////////////////////////////////////////
+    // lzc 
+    ///////////////////////////////////////////////////////////////////////////
+    
+    // choose the input to the leading zero counter i.e. priority encoder
+    //             int -> fp : | positive integer | 00000... (if needed) | 
+    //             fp  -> fp : | fraction         | 00000... (if needed) | 
+    assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
+                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
+    
+    lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // shifter
+    ///////////////////////////////////////////////////////////////////////////
+
+    // seclect the input to the shifter
+    //      fp  -> int:
+    //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary |
+    //          Other problems:
+    //              - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
+    //              - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
+    //                  - ex: for the case 0010000.... (double)
+    //      ??? -> fp:
+    //          - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
+    //          - otherwise:
+    //              |     lzcIn      | 0's if nessisary | 
+    assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
+                     ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} : 
+                                   {LzcIn, {`NF+1{1'b0}}};
+// kill the shift if it's negitive
+    // select the amount to shift by
+    //      fp -> int: 
+    //          - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
+    //              - don't shift if supposed to shift right (underflowed or denorm input)
+    //      denormalized/undeflowed result fp -> fp:
+    //          - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
+    //      ??? -> fp: 
+    //          - shift left by ZeroCnt+1 - to shift till the result is normalized
+    //              - only shift fp -> fp if the intital value is denormalized
+    //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
+    //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
+    assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} :
+                    ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] : 
+                              (ZeroCnt+1)&{$clog2(`LGLEN){XDenormE|IntToFp}};
+    
+    // shift
+    //      fp -> int: |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
+    //          process:
+    //              - start - CalcExp = 1 + XExp - Largest Bias
+    //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
+    //
+    //              - shift left 1 (1)
+    //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
+    //                                      . <- binary point
+    //
+    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
+    //                  |  0's |     Mantissa      |      0's if nessisary     |
+    //                  |     keep          |
+    //
+    //      fp -> fp:
+    //          - if result is denormalized or underflowed:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | << NF+CalcExp-1
+    //          process:
+    //             - start
+    //                 |     mantissa      | 0's |
+    //
+    //             - shift right by NF-1 (NF-1)
+    //                 |  `NF-1  zeros   |     mantissa      | 0's |
+    //
+    //             - shift left by CalcExp = XExp - Largest bias + new bias
+    //                 |   0's  |     mantissa      |     0's      |
+    //                 |       keep      |
+    //
+    //          - if the input is denormalized:
+    //              |     lzcIn      | 0's if nessisary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+    //
+    //      int -> fp: |     lzcIn      | 0's if nessisary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+
+    assign Shifted = ShiftIn << ShiftAmt;
+
+    ///////////////////////////////////////////////////////////////////////////
+    // exp calculations
+    ///////////////////////////////////////////////////////////////////////////
+
+
+    // *** possible optimizaations:
+        //  - if subtracting exp by bias only the msb needs a full adder, the rest can be HA - dunno how to implement this for synth
+        //  - Smaller exp -> Larger Exp can be calculated with: *** can use in Other units??? FMA??? insert this thing in later
+        //          Exp if in range: {~Exp[SNE-1], Exp[SNE-2:0]}
+        //          Exp in range if: Exp[SNE-1] = 1 & Exp[LNE-2:SNE] = 1111... & Exp[LNE-1] = 0 | Exp[SNE-1] = 0 & Exp[LNE-2:SNE] = 000... & Exp[LNE-1] = 1
+        //                     i.e.: &Exp[LNE-2:SNE-1] xor Exp[LNE-1]
+        //          Too big if:      Exp[LNE-1] = 1
+        //          Too small if:    none of the above
+
+    // Select the bias of the output
+    //      fp -> int : select 1
+    //      ??? -> fp : pick the new bias depending on the output format 
+    if (`FPSIZES == 1) begin
+        assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); 
+
+    end else if (`FPSIZES == 2) begin
+        assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+
+    end else if (`FPSIZES == 3) begin
+        logic [`NE-2:0] NewBiasToFp;
+        always_comb
+            case (OutFmt)
+                `FMT: NewBiasToFp =  (`NE-1)'(`BIAS);
+                `FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
+                `FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
+                default: NewBiasToFp = 1'bx;
+            endcase
+        assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
+
+    end else if (`FPSIZES == 4) begin        
+        logic [`NE-2:0] NewBiasToFp;
+        always_comb
+            case (OutFmt)
+                2'h3: NewBiasToFp =  (`NE-1)'(`Q_BIAS);
+                2'h1: NewBiasToFp =  (`NE-1)'(`D_BIAS);
+                2'h0: NewBiasToFp =  (`NE-1)'(`S_BIAS);
+                2'h2: NewBiasToFp =  (`NE-1)'(`H_BIAS);
+            endcase
+        assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
+    end
+    // select the old exponent
+    //      int -> fp : largest bias + XLEN
+    //      fp -> ??? : XExp
+    assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : XExpE;
+    
+    // calculate CalcExp
+    //      fp -> fp : 
+    //          - XExp - Largest bias + new bias - (ZeroCnt+1)
+    //                                          only do ^ if the input was denormalized
+    //              - convert the expoenent to the final preciaion (Exp - oldBias + newBias)
+    //              - correct the expoent when there is a normalization shift ( + ZeroCnt+1) 
+    //      fp -> int : XExp - Largest Bias + 1 - (ZeroCnt+1)
+    //          |  `XLEN  zeros |     Mantissa      | 0's if nessisary | << CalcExp
+    //          process:
+    //              - start
+    //                  |  `XLEN  zeros     |     Mantissa      | 0's if nessisary |
+    //
+    //              - shift left 1 (1)
+    //                  | `XLEN-1 zeros |bit|     frac      | 0's if nessisary |
+    //                                      . <- binary point
+    //
+    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
+    //                  |  0's |     Mantissa      |      0's if nessisary     |
+    //                  |     keep        |
+    //
+    //              - if the input is denormalized then we dont shift... so the  "- (ZeroCnt+1)" is just leftovers from other options
+    //      int -> fp : largest bias  XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt
+    //              Process:
+    //                  - shifted right by XLEN (XLEN)
+    //                  - shift left to normilize (-1-ZeroCnt)
+    //                  - newBias to make the biased exponent
+    //          
+    assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN){XDenormE|IntToFp}})};
+    // find if the result is dnormal or underflows
+    //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
+    //      - can't underflow an integer to Fp conversion
+    assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
+    // choose the negative of the fraction size
+    if (`FPSIZES == 1) begin
+        assign ResNegNF = -`NF; 
+
+    end else if (`FPSIZES == 2) begin
+        assign ResNegNF = OutFmt ? -`NF : -`NF1;
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT:  ResNegNF = -`NF;
+                `FMT1: ResNegNF = -`NF1;
+                `FMT2: ResNegNF = -`NF2;
+                default: ResNegNF = 1'bx;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (OutFmt)
+                2'h3: ResNegNF = -`Q_NF;
+                2'h1: ResNegNF = -`D_NF;
+                2'h0: ResNegNF = -`S_NF;
+                2'h2: ResNegNF = -`H_NF;
+            endcase
+    end
+    // determine if the result underflows ??? -> fp
+    //      - if the first 1 is shifted out of the result then the result underflows
+    //      - can't underflow an integer to fp conversions
+    assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
+
+    
+    ///////////////////////////////////////////////////////////////////////////
+    // sign
+    ///////////////////////////////////////////////////////////////////////////
+
+    // determine the sign of the result
+    //      - if int -> fp
+    //          - if 64-bit : check the msb of the 64-bit integer input and if it's signed
+    //          - if 32-bit : check the msb of the 32-bit integer input and if it's signed
+    //      - otherwise: the floating point input's sign
+    assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
+
+    ///////////////////////////////////////////////////////////////////////////
+    // rounding
+    ///////////////////////////////////////////////////////////////////////////
+
+    // round to nearest even
+    //      {Round, Sticky}
+    //      0x - do nothing
+    //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+    //      11 - Plus1
+
+    //  round to zero - do nothing
+
+    //  round to -infinity - Plus1 if negative
+
+    //  round to infinity - Plus1 if positive
+
+    //  round to nearest max magnitude
+    //      {Guard, Round, Sticky}
+    //      0x - do nothing
+    //      1x - Plus1
+    // ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
+    if (`FPSIZES == 1) begin
+        assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
+        assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
+        assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
+
+    end else if (`FPSIZES == 2) begin    
+        assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : 
+                        (OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
+        assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : 
+                        OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
+        assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : 
+                        OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
+
+    end else if (`FPSIZES == 3) begin
+        logic ToFpSticky, ToFpRound, ToFpLSBFrac;
+        always_comb
+            case (OutFmt)
+                `FMT:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`NF-`NF];
+                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
+                end
+                `FMT1:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`NF-`NF1];
+                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
+                end
+                `FMT2:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`NF-`NF2];
+                     ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
+                end
+                default:  begin 
+                     ToFpSticky = 1'bx;
+                     ToFpRound = 1'bx;
+                     ToFpLSBFrac = 1'bx;
+                end
+            endcase
+            assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
+            assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
+            assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
+
+    end else if (`FPSIZES == 4) begin        
+        logic ToFpSticky, ToFpRound, ToFpLSBFrac;
+        always_comb
+            case (OutFmt)
+                2'h3:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`Q_NF];
+                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
+                end
+                2'h1:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`D_NF];
+                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
+                end
+                2'h0:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`S_NF];
+                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
+                end
+                2'h2:  begin 
+                     ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
+                     ToFpRound =   Shifted[`LGLEN+`Q_NF-`H_NF];
+                     ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
+                end
+            endcase
+            assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
+            assign Round =  ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
+            assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
+    end
+
+    always_comb
+        // Determine if you add 1
+        case (FrmE)
+            3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
+            3'b001: CalcPlus1 = 0;//round to zero
+            3'b010: CalcPlus1 = ResSgn;//round down
+            3'b011: CalcPlus1 = ~ResSgn;//round up
+            3'b100: CalcPlus1 = Round;//round to nearest max magnitude
+            default: CalcPlus1 = 1'bx;
+        endcase
+
+    // dont round if exact
+    assign Plus1 = CalcPlus1&(Round|Sticky);
+
+    // shift the 1 to the propper position for rounding
+    //     - dont round it converting to integer
+    if (`FPSIZES == 1) begin
+        assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
+
+    end else if (`FPSIZES == 2) begin
+        assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT:  ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
+                `FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
+                `FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
+                default: ShiftedPlus1 = 0;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (OutFmt)
+                2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
+                2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
+                2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
+                2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
+            endcase
+    end
+    // kill calcExp if the result is denormalized
+    assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
+    // trim the result's expoent to size
+    assign ResExp = FullResExp[`NE-1:0];
+    ///////////////////////////////////////////////////////////////////////////
+    // flags
+    ///////////////////////////////////////////////////////////////////////////
+    
+    // calculate the flags
+
+    // find the maximum exponent (the exponent and larger overflows)
+    if (`FPSIZES == 1) begin
+        assign MaxExp = ToInt ? Int64 ? 65 : 33 : {`NE{1'b1}};
+
+    end else if (`FPSIZES == 2) begin    
+        assign MaxExp = ToInt ? Int64 ? 65 : 33 :
+                OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
+
+    end else if (`FPSIZES == 3) begin
+        logic [`NE-1:0] MaxExpFp;
+        always_comb
+            case (OutFmt)
+                `FMT:  begin 
+                     MaxExpFp = {`NE{1'b1}};
+                end
+                `FMT1:  begin 
+                     MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
+                end
+                `FMT2:  begin 
+                     MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
+                end
+                default:  begin 
+                     MaxExpFp = 1'bx;
+                end
+            endcase
+            assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp;
+
+    end else if (`FPSIZES == 4) begin        
+        logic [`NE-1:0] MaxExpFp;
+        always_comb
+            case (OutFmt)
+                2'h3:  begin 
+                     MaxExpFp = {`Q_NE{1'b1}};
+                end
+                2'h1:  begin 
+                     MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
+                end
+                2'h0:  begin 
+                     MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
+                end
+                2'h2:  begin 
+                     MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
+                end
+            endcase
+            assign MaxExp = ToInt ? Int64 ? 65 : 33 : MaxExpFp;
+    end
+
+    //                 if the result exponent is larger then the maximum possible exponent
+    //                 |                  and the exponent is positive
+    //                 |                  |             and the input is not NaN or Infinity
+    //                 |                  |             |
+    assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
+
+    //                 if the result is denormalized or underflowed
+    //                 |             and the result did not round into normal values
+    //                 |             |                             and the result is not exact
+    //                 |             |                             |              and the result isn't NaN
+    //                 |             |                             |              |
+    assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
+
+    // we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
+    //                  if there were bits thrown away
+    //                  |            if overflowed or underflowed
+    //                  |            |                    and if not a NaN
+    //                  |            |                    |
+    assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
+
+    //                  if the result is too small to be represented and not 0
+    //                  |                                     and if the result is not invalid (outside the integer bounds)
+    //                  |                                     |
+    assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
+
+    // select the inexact flag to output
+    assign Inexact = ToInt ? IntInexact : FpInexact;
+
+    //                  if an input was a singaling NaN(and we're using a FP input)
+    //                  |
+    assign FpInvalid = (XSNaNE&~IntToFp);
+
+    assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
+			              Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
+    //                  if the input is NaN or infinity
+    //                  |           if the integer result overflows (out of range) 
+    //                  |           |         if the input was negitive but ouputing to a unsigned number
+    //                  |           |         |                    the result doesn't round to zero
+    //                  |           |         |                    |               or the result rounds up out of bounds
+    //                  |           |         |                    |                       and the result didn't underflow
+    //                  |           |         |                    |                       |
+    assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
+    //                                                                                                     |
+    //                                                                                                     or when the positive result rounds up out of range
+    // select the inexact flag to output
+    assign Invalid = ToInt ? IntInvalid : FpInvalid;
+    // pack the flags together
+    //      - fp -> int does not set the overflow or underflow flags
+    assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
+
+
+    ///////////////////////////////////////////////////////////////////////////
+    // result selection
+    ///////////////////////////////////////////////////////////////////////////
+
+    // determine if you shoould kill the result
+    //      - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+    //      - dont set to zero if fp input is zero but not using the fp input
+    //      - dont set to zero if int input is zero but not using the int input
+    assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
+
+    if (`FPSIZES == 1) begin        
+        // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+        if(`IEEE754) begin
+            assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
+        end else begin 
+            assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
+        end
+        // determine the infinity result
+        //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+        //      - otherwise: output infinity with the correct sign
+        //      - kill the infinity singal if the input isn't fp
+        assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+
+        // result for when the result is killed i.e. underflowes
+        //      - output a rounded 0 with the correct sign
+        assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
+
+        // format the result - NaN box single precision (put 1's in the unused msbs)
+        assign Res   = {ResSgn, ResExp, ResFrac};
+
+
+    end else if (`FPSIZES == 2) begin
+        // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+        if(`IEEE754) begin
+            assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
+        end else begin 
+            assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
+        end
+        // determine the infinity result
+        //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+        //      - otherwise: output infinity with the correct sign
+        //      - kill the infinity singal if the input isn't fp
+        assign InfRes =  OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                                        {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                                                 (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
+                                                                                                                                        {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+
+        // result for when the result is killed i.e. underflowes
+        //      - output a rounded 0 with the correct sign
+        assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
+
+        // format the result - NaN box single precision (put 1's in the unused msbs)
+        assign Res   = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
+                    end else begin 
+                        NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {ResSgn, ResExp, ResFrac};
+                end
+                `FMT1: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
+                    end else begin 
+                        NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
+                end
+                `FMT2: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
+                    end else begin 
+                        NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
+                end
+                default: begin
+                    NaNRes = 1'bx;
+                    InfRes = 1'bx;
+                    UfRes  = 1'bx;
+                    Res    = 1'bx;
+                end
+            endcase
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (OutFmt)
+                2'h3: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
+                    end else begin 
+                        NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {ResSgn, ResExp, ResFrac};
+                end
+                2'h1: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
+                    end else begin 
+                        NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
+                end
+                2'h0: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
+                    end else begin 
+                        NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
+                end
+                2'h2: begin
+                    // IEEE sends a payload while Riscv says to send a canonical quiet NaN
+                    if(`IEEE754) begin
+                        NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
+                    end else begin 
+                        NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
+                    end
+                    // determine the infinity result
+                    //      - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
+                    //      - otherwise: output infinity with the correct sign
+                    //      - kill the infinity singal if the input isn't fp
+                    InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
+
+                    // result for when the result is killed i.e. underflowes
+                    //      - output a rounded 0 with the correct sign
+                    UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
+
+                    // format the result - NaN box single precision (put 1's in the unused msbs)
+                    Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
+                end
+            endcase
+    end
+
+    
+    // choose the floating point result
+    //      - if the input is NaN (and using the NaN input) output the NaN result
+    //      - if the input is infinity or the output overflows
+    //      - kill the InfE signal if the input isn't a floating point value
+    //      - if killing the result output the underflow result
+    //      - otherwise output the normal result
+    assign CvtResE = XNaNE&~IntToFp ? NaNRes : 
+                     (XInfE&~IntToFp)|Overflow ? InfRes :
+                     KillRes ? UfRes :
+                     Res;
+    // *** probably can optimize the negation
+    // select the overflow integer result
+    //      - negitive infinity and out of range negitive input
+    //                 |  int  |  long  |
+    //          signed | -2^31 | -2^63  |
+    //        unsigned |   0   |    0   |
+    //
+    //      - positive infinity and out of range negitive input and NaNs
+    //                 |   int  |  long  |
+    //          signed | 2^31-1 | 2^63-1 |
+    //        unsigned | 2^32-1 | 2^64-1 |
+    //
+    //      other: 32 bit unsinged result should be sign extended as if it were a signed number
+    assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
+                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
+                               XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
+                                              {`XLEN{1'b1}};// unsigned positive
+    
+    // round and negate the positive result if needed
+    assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
+    // select the integer output
+    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow result
+    //      - if the input underflows
+    //          - if rounding and signed opperation and negitive input, output -1
+    //          - otherwise output a rounded 0
+    //      - otherwise output the normal result (trmined and sign extended if nessisary)
+    assign CvtIntResE = Invalid ?  OfIntRes :
+			            CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
+                        Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
+
+endmodule
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -43,8 +43,7 @@ module fma(
    input logic                 XSgnM, YSgnM,           // input signs - memory stage
    input logic [`NE-1:0]       ZExpM,    // input exponents - memory stage
    input logic [`NF:0]         XManM, YManM, ZManM,    // input mantissa - memory stage
-    input logic                 ZOrigDenormE, // is the original precision denormalized
-    input logic                 XDenormE, YDenormE, ZDenormE, // is denorm
+    input logic                 ZDenormE, // is denorm
    input logic                 XZeroE, YZeroE, ZZeroE,     // is zero - execute stage
    input logic                 XNaNM, YNaNM, ZNaNM,        // is NaN
    input logic                 XSNaNM, YSNaNM, ZSNaNM,     // is signaling NaN
@ -73,10 +72,10 @@ module fma(
    logic 			    PSgnE, PSgnM;
    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
    logic               Mult;
-    logic               ZOrigDenormM;
+    logic               ZDenormM;
    
    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-                .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
+                .XZeroE, .YZeroE, .ZZeroE,
                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
                
@ -84,10 +83,10 @@ module fma(
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
    flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZOrigDenormE},
-                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZOrigDenormM});
+                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
+                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});

-    fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
+    fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
            .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
            .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
            .FMAResM, .FMAFlgM);
@ -101,7 +100,6 @@ module fma1(
    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
    input logic  [`NE-1:0]      XExpE, YExpE, ZExpE,    // biased exponents in B(NE.0) format
    input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
-    input logic                 XDenormE, YDenormE, ZDenormE, // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic  [`FPSIZES/3:0] FmtE,       // precision 1 = double 0 = single
@ -116,13 +114,11 @@ module fma1(
    output logic [$clog2(3*`NF+7)-1:0]          NormCntE        // normalization shift cnt
    );

-    logic [`NE-1:0]     Denorm;             // value of a denormaized number based on precision
    logic [2*`NF+1:0]   ProdManE;           // 1.X frac * 1.Y frac in U(2.2Nf) format
    logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)
    logic [3*`NF+6:0]   AlignedAddendInv;   // aligned addend possibly inverted
    logic [2*`NF+1:0]   ProdManKilled;      // the product's mantissa possibly killed
    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
-    logic [`NE-1:0]     XExpVal, YExpVal;   // exponent value after taking into accound denormals
    ///////////////////////////////////////////////////////////////////////////////
    // Calculate the product
    //      - When multipliying two fp numbers, add the exponents
@ -133,8 +129,8 @@ module fma1(
   

   // calculate the product's exponent 
-    expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal, 
-                    .Denorm, .ProdExpE);
+    expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE,
+                 .ProdExpE);

    // multiplication of the mantissa's
    mult mult(.XManE, .YManE, .ProdManE);
@ -143,7 +139,7 @@ module fma1(
    // Alignment shifter
    ///////////////////////////////////////////////////////////////////////////////

-    align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal,
+    align align(.ZExpE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .XExpE, .YExpE,
                        .AlignedAddendE, .AddendStickyE, .KillProdE);
                        
    // calculate the signs and take the opperation into account
@ -167,51 +163,12 @@ endmodule
 module expadd(    
    input  logic [`FPSIZES/3:0] FmtE,          // precision
    input  logic [`NE-1:0]      XExpE, YExpE,  // input exponents
-    input  logic                XDenormE, YDenormE,    // are the inputs denormalized
    input  logic                XZeroE, YZeroE,        // are the inputs zero
-    output logic [`NE-1:0]      XExpVal, YExpVal,      // Exponent value after taking into account denormals
-    output logic [`NE-1:0]      Denorm,        // value of denormalized exponent
    output logic [`NE+1:0]      ProdExpE       // product's exponent B^(1023)NE+2
 );

-
-    // denormalized numbers have diffrent values depending on which precison it is.
-    //      FLEN - 1
-    //      Other - BIAS - other bias + 1
-    
-    if (`FPSIZES == 1) begin
-        assign Denorm = 1;
-
-    end else if (`FPSIZES == 2) begin
-        assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
-
-    end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (FmtE)
-                `FMT: Denorm = 1;
-                `FMT1: Denorm = `BIAS-`BIAS1+1;
-                `FMT2: Denorm = `BIAS-`BIAS2+1;
-                default: Denorm = 1'bx;
-            endcase
-        end
-
-    end else if (`FPSIZES == 4) begin
-        always_comb begin
-            case (FmtE)
-                2'h3: Denorm = 1;
-                2'h1: Denorm = `BIAS-`D_BIAS+1;
-                2'h0: Denorm = `BIAS-`S_BIAS+1;
-                2'h2: Denorm = `BIAS-`H_BIAS+1;
-            endcase
-        end
-
-    end
-
-    // pick denormalized value or exponent
-    assign XExpVal = XDenormE ? Denorm : XExpE;
-    assign YExpVal = YDenormE ? Denorm : YExpE;
    // kill the exponent if the product is zero - either X or Y is 0
-    assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
+    assign ProdExpE = ({2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};

 endmodule

@ -258,13 +215,10 @@ endmodule


 module align(
-    input logic  [`NE-1:0]      ZExpE,      // biased exponents in B(NE.0) format
+    input logic  [`NE-1:0]      XExpE, YExpE, ZExpE,      // biased exponents in B(NE.0) format
    input logic  [`NF:0]        ZManE,      // fractions in U(0.NF) format]
-    input logic                 ZDenormE,   // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic  [`NE-1:0]      XExpVal, YExpVal,       // Exponent value after taking into account denormals
    input logic  [`NE+1:0]      ProdExpE,       // the product's exponent
-    input logic  [`NE-1:0]      Denorm,         // the biased value of a denormalized number
    output logic [3*`NF+5:0]    AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
    output logic                AddendStickyE,  // Sticky bit calculated from the aliged addend
    output logic                KillProdE       // should the product be set to zero
@ -273,7 +227,6 @@ module align(
    logic [`NE+1:0]     AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
    logic [4*`NF+5:0]   ZManShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
    logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter U(NF+5.3NF+1)
-    logic [`NE-1:0]     ZExpVal;            // Exponent value after taking into account denormals

    ///////////////////////////////////////////////////////////////////////////////
    // Alignment shifter
@ -282,11 +235,9 @@ module align(
    // determine the shift count for alignment
    //      - negitive means Z is larger, so shift Z left
    //      - positive means the product is larger, so shift Z right
-    //      - Denormal numbers have a diffrent exponent value depending on the precision
-    assign ZExpVal = ZDenormE ? Denorm : ZExpE;
-    // assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
    // *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22
-    assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};
+    //      KP- yes we used ProdExpE originally but we did this for timing
+    assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpE};

    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
@ -409,22 +360,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098



-    lzc lzc(.f, .NormCntE);
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
  
 endmodule

-module lzc(
-    input logic  [3*`NF+6:0]            f,
-    output logic [$clog2(3*`NF+7)-1:0]    NormCntE    // normalization shift
-);
-    
-    logic [$clog2(3*`NF+7)-1:0] i;
-    always_comb begin
-        i = 0;
-        while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1;  // search for leading one
-        NormCntE = i;
-    end
-endmodule



@ -450,7 +389,7 @@ module fma2(
    input logic     [3*`NF+5:0]             SumM,       // the positive sum
    input logic                             NegSumM,    // was the sum negitive
    input logic                             InvZM,      // do you invert Z
-    input logic                             ZOrigDenormM, // is the original precision denormalized
+    input logic                             ZDenormM, // is the original precision denormalized
    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
    input logic                             PSgnM,      // the product's sign
    input logic                             Mult,       // multiply opperation
@ -465,7 +404,7 @@ module fma2(
    logic               ResultSgn, ResultSgnTmp;  // Result sign
    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
-    logic [`NF+2:0]     NormSum;        // normalized sum
+    logic [`NF+1:0]     NormSum;        // normalized sum
    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
    logic               SumZero;        // is the sum zero
    logic               ResultDenorm;   // is the result denormalized
@ -486,7 +425,7 @@ module fma2(
    ///////////////////////////////////////////////////////////////////////////////

    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, 
-            .ZOrigDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
+            .ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);



@ -533,7 +472,7 @@ module fma2(
    // Select the result
    ///////////////////////////////////////////////////////////////////////////////

-    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM,
+    resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
@ -580,9 +519,9 @@ module normalize(
    input logic  [$clog2(3*`NF+7)-1:0]  NormCntM,   // normalization shift count
    input logic  [`FPSIZES/3:0]         FmtM,       // precision 1 = double 0 = single
    input logic                         KillProdM,  // is the product set to zero
-    input logic 			            ZOrigDenormM,
+    input logic 			            ZDenormM,
    input logic                         AddendStickyM,  // the sticky bit caclulated from the aligned addend
-    output logic [`NF+2:0]              NormSum,        // normalized sum
+    output logic [`NF+1:0]              NormSum,        // normalized sum
    output logic                        SumZero,        // is the sum zero
    output logic                        NormSumSticky, UfSticky,    // sticky bits
    output logic [`NE+1:0]              SumExp,         // exponent of the normalized sum
@ -599,12 +538,12 @@ module normalize(
    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
    ///////////////////////////////////////////////////////////////////////////////
-
+    //*** insert bias-bias simplification in fcvt.sv/phone pictures
    // Determine if the sum is zero
    assign SumZero = ~(|SumM);

    // calculate the sum's exponent
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZOrigDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));

    //convert the sum's exponent into the propper percision
    if (`FPSIZES == 1) begin
@ -707,27 +646,27 @@ module normalize(
    assign LZAPlus2 = SumShifted[3*`NF+8];
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
-    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
+    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];

    // Calculate the sticky bit
    if (`FPSIZES == 1) begin
-        assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
+        assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];

    end else if (`FPSIZES == 2) begin
        // 3*NF+5 - NF1 - 3
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
-        (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
+        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
+        (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);

    end else if (`FPSIZES == 3) begin
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
-        (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | 
-        (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
+        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
+        (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) | 
+        (|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));

    end else if (`FPSIZES == 4) begin        
-        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
-        (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | 
-        (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
-        (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
+        assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | 
+        (|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | 
+        (|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
+        (|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));

    end

@ -745,7 +684,7 @@ module fmaround(
    input logic  [`FPSIZES/3:0] FmtM,       // precision 1 = double 0 = single
    input logic  [2:0]          FrmM,       // rounding mode
    input logic                 UfSticky,   // sticky bit for underlow calculation
-    input logic  [`NF+2:0]      NormSum,    // normalized sum
+    input logic  [`NF+1:0]      NormSum,    // normalized sum
    input logic                 AddendStickyM,  // addend's sticky bit
    input logic                 NormSumSticky,  // normalized sum's sticky bit
    input logic                 ZZeroM,         // is Z zero
@ -799,83 +738,53 @@ module fmaround(

    if (`FPSIZES == 1) begin
        // determine guard, round, and least significant bit of the result
-        assign Guard = NormSum[2];
        assign Round = NormSum[1];
-        assign LSBNormSum = NormSum[3];
+        assign LSBNormSum = NormSum[2];

        // used to determine underflow flag
-        assign UfGuard = NormSum[1];
        assign UfRound = NormSum[0];
-        assign UfLSBNormSum = NormSum[2];
-
-        // determine sticky
-        assign Sticky = UfSticky | NormSum[0];

    end else if (`FPSIZES == 2) begin
        //         \/-------------NF---------------,
-        //      |      NF1       | 3 |             |
+        //      |      NF1       | 2 |             |
        //          '-------NF1------^

        // determine guard, round, and least significant bit of the result
-        assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
        assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
-        assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
+        assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];

        // used to determine underflow flag
-        assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
        assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
-        assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];

-        // determine sticky
-        assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);

    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (FmtM)
                `FMT: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[2];
                    Round = NormSum[1];
-                    LSBNormSum = NormSum[3];
+                    LSBNormSum = NormSum[2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[1];
                    UfRound = NormSum[0];
-                    UfLSBNormSum = NormSum[2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[0];
                end
                `FMT1: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[`NF-`NF1+2];
                    Round = NormSum[`NF-`NF1+1];
-                    LSBNormSum = NormSum[`NF-`NF1+3];
+                    LSBNormSum = NormSum[`NF-`NF1+2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[`NF-`NF1+1];
                    UfRound = NormSum[`NF-`NF1];
-                    UfLSBNormSum = NormSum[`NF-`NF1+2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[`NF-`NF1];
                end
                `FMT2: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[`NF-`NF2+2];
                    Round = NormSum[`NF-`NF2+1];
-                    LSBNormSum = NormSum[`NF-`NF2+3];
+                    LSBNormSum = NormSum[`NF-`NF2+2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[`NF-`NF2+1];
                    UfRound = NormSum[`NF-`NF2];
-                    UfLSBNormSum = NormSum[`NF-`NF2+2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[`NF-`NF2];
                end
                default: begin
-                    Guard = 1'bx;
                    Round = 1'bx;
                    LSBNormSum = 1'bx;
-                    UfGuard = 1'bx;
                    UfRound = 1'bx;
-                    UfLSBNormSum = 1'bx;
-                    Sticky = 1'bx;
                end
            endcase
        end
@ -885,56 +794,40 @@ module fmaround(
            case (FmtM)
                2'h3: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[2];
                    Round = NormSum[1];
-                    LSBNormSum = NormSum[3];
+                    LSBNormSum = NormSum[2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[1];
                    UfRound = NormSum[0];
-                    UfLSBNormSum = NormSum[2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[0];
                end
                2'h1: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[`NF-`D_NF+2];
                    Round = NormSum[`NF-`D_NF+1];
-                    LSBNormSum = NormSum[`NF-`D_NF+3];
+                    LSBNormSum = NormSum[`NF-`D_NF+2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[`NF-`D_NF+1];
                    UfRound = NormSum[`NF-`D_NF];
-                    UfLSBNormSum = NormSum[`NF-`D_NF+2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[`NF-`D_NF];
                end
                2'h0: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[`NF-`S_NF+2];
                    Round = NormSum[`NF-`S_NF+1];
-                    LSBNormSum = NormSum[`NF-`S_NF+3];
+                    LSBNormSum = NormSum[`NF-`S_NF+2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[`NF-`S_NF+1];
                    UfRound = NormSum[`NF-`S_NF];
-                    UfLSBNormSum = NormSum[`NF-`S_NF+2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[`NF-`S_NF];
                end
                2'h2: begin
                    // determine guard, round, and least significant bit of the result
-                    Guard = NormSum[`NF-`H_NF+2];
                    Round = NormSum[`NF-`H_NF+1];
-                    LSBNormSum = NormSum[`NF-`H_NF+3];
+                    LSBNormSum = NormSum[`NF-`H_NF+2];
                    // used to determine underflow flag
-                    UfGuard = NormSum[`NF-`H_NF+1];
                    UfRound = NormSum[`NF-`H_NF];
-                    UfLSBNormSum = NormSum[`NF-`H_NF+2];
-                    // determine sticky
-                    Sticky = UfSticky | NormSum[`NF-`H_NF];
                end
            endcase
        end

    end
+    // used to determine underflow flag
+    assign UfLSBNormSum = Round;
+    // determine sticky
+    assign Sticky = UfSticky | UfRound;


    // Deterimine if a small number was supposed to be subtrated
@ -944,28 +837,28 @@ module fmaround(
    always_comb begin
        // Determine if you add 1
        case (FrmM)
-            3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even
+            3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down
-            3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up
-            3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude
+            3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
+            3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
+            3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
            default: CalcPlus1 = 1'bx;
        endcase
        // Determine if you add 1 (for underflow flag)
        case (FrmM)
-            3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even
+            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
            3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down
-            3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up
-            3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude
+            3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
+            3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
+            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
            default: UfCalcPlus1 = 1'bx;
        endcase
        // Determine if you subtract 1
        case (FrmM)
            3'b000: CalcMinus1 = 0;//round to nearest even
-            3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
-            3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down
-            3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up
+            3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
+            3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
+            3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
            default: CalcMinus1 = 1'bx;
        endcase
@ -973,9 +866,9 @@ module fmaround(
    end

    // If an answer is exact don't round
-    assign Plus1 = CalcPlus1 & (Sticky | Guard | Round);
-    assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky
-    assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
+    assign Plus1 = CalcPlus1 & (Sticky | Round);
+    assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
+    assign Minus1 = CalcMinus1 & (Sticky | Round);

    // Compute rounded result
    if (`FPSIZES == 1) begin
@ -1011,7 +904,7 @@ module fmaround(

    end

-    assign NormSumTruncated = NormSum[`NF+2:3];
+    assign NormSumTruncated = NormSum[`NF+1:2];
    assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
    assign ResultExp = FullResultExp[`NE-1:0];

@ -1083,12 +976,12 @@ module fmaflags(
    // Set Underflow flag if the number is too small to be represented in normal numbers
    //      - Don't set the underflow flag if the result is exact

-    assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
+    assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
    //                      exp is negitive         result is denorm        exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
-    assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
+    assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
    // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
    //      - Don't set the underflow flag if an underflowed result isn't outputed
-    assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
+    assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);

    // Combine flags
    //      - FMA can't set the Divide by zero flag
@ -1108,7 +1001,7 @@ module resultselect(
    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                     ZOrigDenormM, // is the original precision denormalized
+    input logic                     ZDenormM, // is the original precision denormalized
    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
    input logic                     PSgnM,      // the product's sign
    input logic                     ResultSgn,  // the result's sign
@ -1134,7 +1027,7 @@ module resultselect(
        end
        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1153,7 +1046,7 @@ module resultselect(
                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1173,7 +1066,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1189,7 +1082,7 @@ module resultselect(
                    end
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
                    NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
@ -1206,7 +1099,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
                    NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
@ -1244,7 +1137,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
                    UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
                    InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
                    NormResult = {ResultSgn, ResultExp, ResultFrac};
@ -1260,7 +1153,7 @@ module resultselect(
                    end
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
                    NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
@ -1277,7 +1170,7 @@ module resultselect(
                    
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
                    NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
@ -1295,7 +1188,7 @@ module resultselect(
                    OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      

-                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
                    UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
                    InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
                    NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -72,7 +72,7 @@ module fpu (
   logic [1:0] 	  FResultSelD, FResultSelE;           // Select the result written to FP register
   logic [1:0] 	  FResultSelM, FResultSelW;           // Select the result written to FP register
   logic [2:0] 	  FOpCtrlD, FOpCtrlE;       // Select which opperation to do in each component
-   logic [2:0] 	  FResSelD, FResSelE;       // Select one of the results that finish in the memory stage
+   logic [1:0] 	  FResSelD, FResSelE;       // Select one of the results that finish in the memory stage
   logic [1:0] 	  FIntResSelD, FIntResSelE;           // Select the result written to the integer resister
   logic [4:0] 	  Adr1E, Adr2E, Adr3E;                // adresses of each input

@ -104,7 +104,6 @@ module fpu (
   logic 		  XInfQ, YInfQ;                       // is the input infinity - divide
   logic 		  XExpMaxE;                           // is the exponent all ones (max value)
   logic 		  XNormE;                             // is normal
-   logic         ZOrigDenormE;
   logic 		  FmtQ;
   logic 		  FOpCtrlQ;     

@ -114,9 +113,8 @@ module fpu (
   logic [63:0] 	  FMAResM, FMAResW;                   // FMA/multiply result
   logic [4:0] 	  FMAFlgM;                   // FMA/multiply result	
   logic [63:0] 	  ReadResW;                           // read result (load instruction)
-   logic [63:0] 	  CvtFpResE;    // add/FP -> FP convert result
-   logic [4:0] 	  CvtFpFlgE;    // add/FP -> FP convert flags
   logic [63:0] 	  CvtResE;                   // FP <-> int convert result
+   logic [`XLEN-1:0] CvtIntResE;                   // FP <-> int convert result
   logic [4:0] 	  CvtFlgE;                   // FP <-> int convert flags //*** trim this	
   logic [63:0] 	  ClassResE;               // classify result
   logic [63:0] 	  CmpResE;                   // compare result
@ -152,7 +150,7 @@ module fpu (
   flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                           {Adr1E, Adr2E, Adr3E});
-   flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+   flopenrc #(16) DECtrlReg3(clk, reset, FlushE, ~StallE, 
               {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
               {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});

@ -177,7 +175,7 @@ module fpu (
   // unpack unit
   //    - splits FP inputs into their various parts
   //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
-   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .ZOrigDenormE,
+   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
@ -189,11 +187,11 @@ module fpu (
   //   - handles FMA and multiply instructions
   fma fma (.clk, .reset, .FlushM, .StallM, 
      .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-      .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
+      .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
      .XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, 
      .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
      .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-      .FOpCtrlE, .ZOrigDenormE,
+      .FOpCtrlE,
      .FmtE, .FmtM, .FrmM, 
      .FMAFlgM, .FMAResM);

@ -214,13 +212,12 @@ module fpu (
         .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));

   // other FP execution units
-   fcvtfp fcvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
   fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE);
-   fcvtint fcvtint (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
-   .CvtResE, .CvtFlgE);
+   fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
+              .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);

   // data to be stored in memory - to IEU
   //    - FP uses NaN-blocking format
@ -231,13 +228,15 @@ module fpu (
   mux2  #(64)  SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);

   // select a result that may be written to the FP register
-   mux5  #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE);
-   mux5  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
+   mux4  #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
+   mux4  #(5)  FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);

   // select the result that may be written to the integer register - to IEU
   mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], 
               CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
+               CvtIntResE, FIntResSelE, FIntResE);
   // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
+   // *** make sure the fpu matches the chapter diagram

   // E/M pipe registers

--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@ -12,7 +12,6 @@ module unpack (
    output logic                    XDenormE, YDenormE, ZDenormE,   // is XYZ denormalized
    output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
    output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-    output logic                    ZOrigDenormE,                   // is the original precision denormalized
    output logic                    XExpMaxE                        // does X have the maximum exponent (NaN or Inf)
 );
 
@ -30,9 +29,9 @@ module unpack (
        assign ZSgnE = Z[`FLEN-1];

        // exponent
-        assign XExpE = X[`FLEN-2:`NF]; 
-        assign YExpE = Y[`FLEN-2:`NF]; 
-        assign ZExpE = Z[`FLEN-2:`NF]; 
+        assign XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
+        assign YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
+        assign ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};

        // fraction (no assumed 1)
        assign XFracE = X[`NF-1:0];
@ -49,7 +48,11 @@ module unpack (
        assign YExpMaxE = &YExpE;
        assign ZExpMaxE = &ZExpE;

-        assign ZOrigDenormE = 1'b0;
+
+        // is the input (in it's original format) denormalized
+        assign XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero; 
+        assign YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero; 
+        assign ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero; 
    

    end else if (`FPSIZES == 2) begin   // if there are 2 floating point formats supported
@ -73,7 +76,6 @@ module unpack (
        //      double and half

        logic  [`LEN1-1:0]  XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed
-        logic               XOrigDenormE, YOrigDenormE;   // the original value of XYZ is denormalized

        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
@ -95,14 +97,16 @@ module unpack (

        // extract the exponent, converting the smaller exponent into the larger precision if nessisary
        //      - if the original precision had a denormal number convert the exponent value 1
-        assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; 
-        assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; 
-        assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; 
+        assign XExpE = FmtE ? {X[`FLEN-2:`NF+1], X[`NF]|XDenormE} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE}; 
+        assign YExpE = FmtE ? {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE}; 
+        assign ZExpE = FmtE ? {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE}; 

        // is the input (in it's original format) denormalized
-        assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; 
-        assign YOrigDenormE = FmtE ? 0 : ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; 
-        assign ZOrigDenormE = FmtE ? 0 : ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; 
+
+                    // is the input (in it's original format) denormalized
+        assign XDenormE = (FmtE ? ~|X[`FLEN-2:`NF] : ~|XLen1[`LEN1-2:`NF1]) & ~XFracZero; 
+        assign YDenormE = (FmtE ? ~|Y[`FLEN-2:`NF] : ~|YLen1[`LEN1-2:`NF1]) & ~YFracZero; 
+        assign ZDenormE = (FmtE ? ~|Z[`FLEN-2:`NF] : ~|ZLen1[`LEN1-2:`NF1]) & ~ZFracZero; 

        // extract the fraction, add trailing zeroes to the mantissa if nessisary
        assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
@ -141,7 +145,6 @@ module unpack (

        logic  [`LEN1-1:0]  XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
        logic  [`LEN2-1:0]  XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
-        logic               XOrigDenormE, YOrigDenormE;   // the original value of XYZ is denormalized
        
        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
@ -156,14 +159,15 @@ module unpack (
        // There are 2 case statements
        //      - one for other singals and one for sgn/exp/frac
        //      - need two for the dependencies in the expoenent calculation
+        //*** pull out the ~FracZero and and it at the end
        always_comb begin
            case (FmtE)
                `FMT: begin // if input is largest precision (`FLEN - ie quad or double)

                    // This is the original format so set OrigDenorm to 0
-                    XOrigDenormE = 1'b0; 
-                    YOrigDenormE = 1'b0; 
-                    ZOrigDenormE = 1'b0; 
+                    XDenormE = ~|X[`FLEN-2:`NF] & ~XFracZero; 
+                    YDenormE = ~|Y[`FLEN-2:`NF] & ~YFracZero; 
+                    ZDenormE = ~|Z[`FLEN-2:`NF] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |X[`FLEN-2:`NF]; 
@ -178,9 +182,9 @@ module unpack (
                `FMT1: begin    // if input is larger precsion (`LEN1 - double or single)

                    // is the input (in it's original format) denormalized
-                    XOrigDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; 
-                    YOrigDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; 
-                    ZOrigDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; 
+                    XDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; 
+                    YDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; 
+                    ZDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |XLen1[`LEN1-2:`NF1]; 
@ -195,9 +199,9 @@ module unpack (
                `FMT2: begin        // if input is smallest precsion (`LEN2 - single or half)

                    // is the input (in it's original format) denormalized
-                    XOrigDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero; 
-                    YOrigDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero; 
-                    ZOrigDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero; 
+                    XDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero; 
+                    YDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero; 
+                    ZDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |XLen2[`LEN2-2:`NF2]; 
@ -210,9 +214,9 @@ module unpack (
                    ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
                end
                default: begin
-                    XOrigDenormE = 0; 
-                    YOrigDenormE = 0; 
-                    ZOrigDenormE = 0; 
+                    XDenormE = 0; 
+                    YDenormE = 0; 
+                    ZDenormE = 0; 
                    XExpNonzero = 0; 
                    YExpNonzero = 0;
                    ZExpNonzero = 0;
@ -231,9 +235,9 @@ module unpack (
                    ZSgnE = Z[`FLEN-1];

                    // extract the exponent
-                    XExpE = X[`FLEN-2:`NF]; 
-                    YExpE = Y[`FLEN-2:`NF]; 
-                    ZExpE = Z[`FLEN-2:`NF]; 
+                    XExpE = {X[`FLEN-2:`NF+1], X[`NF]|XDenormE};
+                    YExpE = {Y[`FLEN-2:`NF+1], Y[`NF]|YDenormE};
+                    ZExpE = {Z[`FLEN-2:`NF+1], Z[`NF]|ZDenormE};

                    // extract the fraction
                    XFracE = X[`NF-1:0];
@ -256,9 +260,9 @@ module unpack (
                    // also need to take into account possible zero/denorm/inf/NaN values

                    // convert the larger precision's exponent to use the largest precision's bias
-                    XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; 
-                    YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; 
-                    ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; 
+                    XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1+1], XLen1[`NF1]|XDenormE}; 
+                    YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1+1], YLen1[`NF1]|YDenormE}; 
+                    ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1+1], ZLen1[`NF1]|ZDenormE}; 

                    // extract the fraction and add the nessesary trailing zeros
                    XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
@ -281,9 +285,9 @@ module unpack (
                    // also need to take into account possible zero/denorm/inf/NaN values
                    
                    // convert the smallest precision's exponent to use the largest precision's bias
-                    XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; 
-                    YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; 
-                    ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; 
+                    XExpE = XDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]}}, XLen2[`LEN2-3:`NF2]}; 
+                    YExpE = YDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]}}, YLen2[`LEN2-3:`NF2]}; 
+                    ZExpE = ZDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]}}, ZLen2[`LEN2-3:`NF2]}; 

                    // extract the fraction and add the nessesary trailing zeros
                    XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
@ -318,7 +322,6 @@ module unpack (
        logic  [`D_LEN-1:0]  XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
        logic  [`S_LEN-1:0]  XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
        logic  [`H_LEN-1:0]  XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
-        logic                XOrigDenormE, YOrigDenormE;   // the original value of XYZ is denormalized
        
        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
        assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
@ -343,10 +346,10 @@ module unpack (
            case (FmtE)
                2'b11: begin  // if input is quad percision

-                    // This is the original format so set OrigDenorm to 0
-                    XOrigDenormE = 1'b0; 
-                    YOrigDenormE = 1'b0; 
-                    ZOrigDenormE = 1'b0; 
+                    // is the input (in it's original format) denormalized
+                    XDenormE = ~|X[`Q_LEN-2:`Q_NF] & ~XFracZero; 
+                    YDenormE = ~|Y[`Q_LEN-2:`Q_NF] & ~YFracZero; 
+                    ZDenormE = ~|Z[`Q_LEN-2:`Q_NF] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |X[`Q_LEN-2:`Q_NF]; 
@ -366,9 +369,9 @@ module unpack (
                    ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF];

                    // is the input (in it's original format) denormalized
-                    XOrigDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero; 
-                    YOrigDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero; 
-                    ZOrigDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero; 
+                    XDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero; 
+                    YDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero; 
+                    ZDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |XLen1[`D_LEN-2:`D_NF]; 
@ -383,9 +386,9 @@ module unpack (
                    ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];

                    // is the input (in it's original format) denormalized
-                    XOrigDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero; 
-                    YOrigDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero; 
-                    ZOrigDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero; 
+                    XDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero; 
+                    YDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero; 
+                    ZDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; 
@ -400,9 +403,9 @@ module unpack (
                    ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];

                    // is the input (in it's original format) denormalized
-                    XOrigDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero; 
-                    YOrigDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero; 
-                    ZOrigDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero; 
+                    XDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero; 
+                    YDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero; 
+                    ZDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero; 

                    // is the exponent non-zero
                    XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; 
@ -421,9 +424,9 @@ module unpack (
                    ZSgnE = Z[`Q_LEN-1];

                    // extract the exponent
-                    XExpE = X[`Q_LEN-2:`Q_NF]; 
-                    YExpE = Y[`Q_LEN-2:`Q_NF]; 
-                    ZExpE = Z[`Q_LEN-2:`Q_NF]; 
+                    XExpE = {X[`Q_LEN-2:`Q_NF+1], X[`Q_NF]|XDenormE};
+                    YExpE = {Y[`Q_LEN-2:`Q_NF+1], Y[`Q_NF]|YDenormE};
+                    ZExpE = {Z[`Q_LEN-2:`Q_NF+1], Z[`Q_NF]|ZDenormE};

                    // extract the fraction
                    XFracE = X[`Q_NF-1:0];
@ -446,9 +449,9 @@ module unpack (
                    
                    // convert the double precsion exponent into quad precsion

-                    XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; 
-                    YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; 
-                    ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; 
+                    XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]}}, XLen1[`D_LEN-3:`D_NF+1], XLen1[`D_NF]|XDenormE}; 
+                    YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]}}, YLen1[`D_LEN-3:`D_NF+1], YLen1[`D_NF]|YDenormE}; 
+                    ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]}}, ZLen1[`D_LEN-3:`D_NF+1], ZLen1[`D_NF]|ZDenormE}; 

                    // extract the fraction and add the nessesary trailing zeros
                    XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
@ -470,9 +473,9 @@ module unpack (
                    // also need to take into account possible zero/denorm/inf/NaN values
                    
                    // convert the single precsion exponent into quad precsion
-                    XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; 
-                    YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; 
-                    ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; 
+                    XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]}}, XLen2[`S_LEN-3:`S_NF+1], XLen2[`S_NF]|XDenormE}; 
+                    YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]}}, YLen2[`S_LEN-3:`S_NF+1], YLen2[`S_NF]|YDenormE}; 
+                    ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]}}, ZLen2[`S_LEN-3:`S_NF+1], ZLen2[`S_NF]|ZDenormE}; 

                    // extract the fraction and add the nessesary trailing zeros
                    XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@ -494,9 +497,9 @@ module unpack (
                    // also need to take into account possible zero/denorm/inf/NaN values

                    // convert the half precsion exponent into quad precsion
-                    XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; 
-                    YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; 
-                    ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; 
+                    XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]}}, XLen3[`H_LEN-3:`H_NF+1], XLen3[`H_NF]|XDenormE}; 
+                    YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]}}, YLen3[`H_LEN-3:`H_NF+1], YLen3[`H_NF]|YDenormE}; 
+                    ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]}}, ZLen3[`H_LEN-3:`H_NF+1], ZLen3[`H_NF]|ZDenormE}; 

                    // extract the fraction and add the nessesary trailing zeros
                    XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
@ -537,10 +540,10 @@ module unpack (
    assign YSNaNE = YNaNE&~YFracE[`NF-1];
    assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];

-    // is the input denormalized
-    assign XDenormE = XExpZero & ~XFracZero;
-    assign YDenormE = YExpZero & ~YFracZero;
-    assign ZDenormE = ZExpZero & ~ZFracZero;
+    // // is the input denormalized
+    // assign XDenormE = XExpZero & ~XFracZero;
+    // assign YDenormE = YExpZero & ~YFracZero;
+    // assign ZDenormE = ZExpZero & ~ZFracZero;

    // is the input infinity
    assign XInfE = XExpMaxE & XFracZero;
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@ -0,0 +1,15 @@
+//leading zero counter i.e. priority encoder
+module lzc #(parameter WIDTH=1) (
+    input logic  [WIDTH-1:0]            num,
+    output logic [$clog2(WIDTH)-1:0]  ZeroCnt
+);
+/* verilator lint_off CMPCONST */
+    
+    logic [$clog2(WIDTH)-1:0] i;
+    always_comb begin
+        i = 0;
+        while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH))'(WIDTH-1))) i = i+1;  // search for leading one
+        ZeroCnt = i;
+    end
+/* verilator lint_on CMPCONST */
+endmodule
--- a/pipelined/src/ppa/ppa.sv
+++ b/pipelined/src/ppa/ppa.sv
@ -126,6 +126,16 @@ module ppa_mult_128 #(parameter WIDTH=128) (
  assign y = a * b;
 endmodule

+module ppa_alu_8 #(parameter WIDTH=8) (
+  input  logic [WIDTH-1:0] A, B,
+  input  logic [2:0]       ALUControl,
+  input  logic [2:0]       Funct3,
+  output logic [WIDTH-1:0] Result,
+  output logic [WIDTH-1:0] Sum);
+
+  ppa_alu #(WIDTH) alu (.*);
+endmodule
+
 module ppa_alu_16 #(parameter WIDTH=16) (
  input  logic [WIDTH-1:0] A, B,
  input  logic [2:0]       ALUControl,
@ -133,7 +143,7 @@ module ppa_alu_16 #(parameter WIDTH=16) (
  output logic [WIDTH-1:0] Result,
  output logic [WIDTH-1:0] Sum);

-  ppa_alu #(WIDTH) alu_16 (.*);
+  ppa_alu #(WIDTH) alu (.*);
 endmodule

 module ppa_alu_32 #(parameter WIDTH=32) (
@ -143,7 +153,7 @@ module ppa_alu_32 #(parameter WIDTH=32) (
  output logic [WIDTH-1:0] Result,
  output logic [WIDTH-1:0] Sum);

-  ppa_alu #(WIDTH) alu_32 (.*);
+  ppa_alu #(WIDTH) alu (.*);
 endmodule

 module ppa_alu_64 #(parameter WIDTH=64) (
@ -153,7 +163,17 @@ module ppa_alu_64 #(parameter WIDTH=64) (
  output logic [WIDTH-1:0] Result,
  output logic [WIDTH-1:0] Sum);

-  ppa_alu #(WIDTH) alu_64 (.*);
+  ppa_alu #(WIDTH) alu (.*);
+endmodule
+
+module ppa_alu_128 #(parameter WIDTH=128) (
+  input  logic [WIDTH-1:0] A, B,
+  input  logic [2:0]       ALUControl,
+  input  logic [2:0]       Funct3,
+  output logic [WIDTH-1:0] Result,
+  output logic [WIDTH-1:0] Sum);
+
+  ppa_alu #(WIDTH) alu (.*);
 endmodule

 module ppa_alu #(parameter WIDTH=32) (
@ -209,9 +229,11 @@ module ppa_alu #(parameter WIDTH=32) (
      3'b111: FullResult = A & B;     // and
    endcase

-  // support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits
-  if (WIDTH==64)  assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
-  else            assign Result = FullResult;
+  assign Result = FullResult;
+  // not using W64 so it has the same architecture regardless of width
+  // // support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits
+  // if (WIDTH==64)  assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
+  // else            assign Result = FullResult;
 endmodule

 module ppa_shiftleft_8 #(parameter WIDTH=8) (
@ -254,51 +276,6 @@ module ppa_shiftleft_128 #(parameter WIDTH=128) (
  assign y = a << amt;
 endmodule

-module ppa_shifter_8 #(parameter WIDTH=8) (
-  input  logic [WIDTH-1:0]     A,
-  input  logic [$clog2(WIDTH)-1:0] Amt,
-  input  logic                 Right, Arith, W64,
-  output logic [WIDTH-1:0]     Y);
-
-  ppa_shifter #(WIDTH) sh (.*);
-endmodule
-
-module ppa_shifter_16 #(parameter WIDTH=16) (
-  input  logic [WIDTH-1:0]     A,
-  input  logic [$clog2(WIDTH)-1:0] Amt,
-  input  logic                 Right, Arith, W64,
-  output logic [WIDTH-1:0]     Y);
-
-  ppa_shifter #(WIDTH) sh (.*);
-endmodule
-
-module ppa_shifter_32 #(parameter WIDTH=32) (
-  input  logic [WIDTH-1:0]     A,
-  input  logic [$clog2(WIDTH)-1:0] Amt,
-  input  logic                 Right, Arith, W64,
-  output logic [WIDTH-1:0]     Y);
-
-  ppa_shifter #(WIDTH) sh (.*);
-endmodule
-
-module ppa_shifter_64 #(parameter WIDTH=64) (
-  input  logic [WIDTH-1:0]     A,
-  input  logic [$clog2(WIDTH)-1:0] Amt,
-  input  logic                 Right, Arith, W64,
-  output logic [WIDTH-1:0]     Y);
-
-  ppa_shifter #(WIDTH) sh (.*);
-endmodule
-
-module ppa_shifter_128 #(parameter WIDTH=128) (
-  input  logic [WIDTH-1:0]     A,
-  input  logic [$clog2(WIDTH)-1:0] Amt,
-  input  logic                 Right, Arith, W64,
-  output logic [WIDTH-1:0]     Y);
-
-  ppa_shifter #(WIDTH) sh (.*);
-endmodule
-
 module ppa_shifter #(parameter WIDTH=32) (
  input  logic [WIDTH-1:0]     A,
  input  logic [$clog2(WIDTH)-1:0] Amt,
@ -313,29 +290,35 @@ module ppa_shifter #(parameter WIDTH=32) (
  // For RV64, 32 and 64-bit shifts are needed, with sign extension.

  // funnel shifter input (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong)
-  if (WIDTH == 64 | WIDTH ==128) begin:shifter  // RV64 or 128
-    always_comb  // funnel mux
-      if (W64) begin // 32-bit shifts
-        if (Right)
-          if (Arith) z = {{WIDTH{1'b0}}, {WIDTH/2 -1{A[WIDTH/2 -1]}}, A[WIDTH/2 -1:0]};
-          else       z = {{WIDTH*3/2-1{1'b0}}, A[WIDTH/2 -1:0]};
-        else         z = {{WIDTH/2{1'b0}}, A[WIDTH/2 -1:0], {WIDTH-1{1'b0}}};
-      end else begin
-        if (Right)
-          if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
-          else       z = {{WIDTH-1{1'b0}}, A};
-        else         z = {A, {WIDTH-1{1'b0}}};         
-      end
-      assign amttrunc = W64  ? {1'b0, Amt[$clog2(WIDTH)-2:0]} : Amt; // 32 or 64-bit shift 
-  end else begin:shifter // RV32 or less
-    always_comb  // funnel mux
+  // if (WIDTH == 64 | WIDTH ==128) begin:shifter  // RV64 or 128
+  //   always_comb  // funnel mux
+  //     if (W64) begin // 32-bit shifts
+  //       if (Right)
+  //         if (Arith) z = {{WIDTH{1'b0}}, {WIDTH/2 -1{A[WIDTH/2 -1]}}, A[WIDTH/2 -1:0]};
+  //         else       z = {{WIDTH*3/2-1{1'b0}}, A[WIDTH/2 -1:0]};
+  //       else         z = {{WIDTH/2{1'b0}}, A[WIDTH/2 -1:0], {WIDTH-1{1'b0}}};
+  //     end else begin
+  //       if (Right)
+  //         if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
+  //         else       z = {{WIDTH-1{1'b0}}, A};
+  //       else         z = {A, {WIDTH-1{1'b0}}};         
+  //     end
+  //     assign amttrunc = W64  ? {1'b0, Amt[$clog2(WIDTH)-2:0]} : Amt; // 32 or 64-bit shift 
+  // end else begin:shifter // RV32 or less
+  //   always_comb  // funnel mux
+  //     if (Right) 
+  //       if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
+  //       else       z = {{WIDTH-1{1'b0}}, A};
+  //     else         z = {A, {WIDTH-1{1'b0}}};
+  //   assign amttrunc = Amt; // shift amount
+  // end 
+    
+  always_comb  // funnel mux
      if (Right) 
        if (Arith) z = {{WIDTH-1{A[WIDTH-1]}}, A};
        else       z = {{WIDTH-1{1'b0}}, A};
      else         z = {A, {WIDTH-1{1'b0}}};
    assign amttrunc = Amt; // shift amount
-  end 
-    

  // opposite offset for right shfits
  assign offset = Right ? amttrunc : ~amttrunc;
@ -345,7 +328,51 @@ module ppa_shifter #(parameter WIDTH=32) (
  assign Y = zshift[WIDTH-1:0];    
 endmodule

-// just report one hot
+  //   module ppa_shifter_8 #(parameter WIDTH=8) (
+  //   input  logic [WIDTH-1:0]     A,
+  //   input  logic [$clog2(WIDTH)-1:0] Amt,
+  //   input  logic                 Right, Arith, W64,
+  //   output logic [WIDTH-1:0]     Y);
+
+  //   ppa_shifter #(WIDTH) sh (.*);
+  // endmodule
+
+  // module ppa_shifter_16 #(parameter WIDTH=16) (
+  //   input  logic [WIDTH-1:0]     A,
+  //   input  logic [$clog2(WIDTH)-1:0] Amt,
+  //   input  logic                 Right, Arith, W64,
+  //   output logic [WIDTH-1:0]     Y);
+
+  //   ppa_shifter #(WIDTH) sh (.*);
+  // endmodule
+
+  // module ppa_shifter_32 #(parameter WIDTH=32) (
+  //   input  logic [WIDTH-1:0]     A,
+  //   input  logic [$clog2(WIDTH)-1:0] Amt,
+  //   input  logic                 Right, Arith, W64,
+  //   output logic [WIDTH-1:0]     Y);
+
+  //   ppa_shifter #(WIDTH) sh (.*);
+  // endmodule
+
+  // module ppa_shifter_64 #(parameter WIDTH=64) (
+  //   input  logic [WIDTH-1:0]     A,
+  //   input  logic [$clog2(WIDTH)-1:0] Amt,
+  //   input  logic                 Right, Arith, W64,
+  //   output logic [WIDTH-1:0]     Y);
+
+  //   ppa_shifter #(WIDTH) sh (.*);
+  // endmodule
+
+  // module ppa_shifter_128 #(parameter WIDTH=128) (
+  //   input  logic [WIDTH-1:0]     A,
+  //   input  logic [$clog2(WIDTH)-1:0] Amt,
+  //   input  logic                 Right, Arith, W64,
+  //   output logic [WIDTH-1:0]     Y);
+
+  //   ppa_shifter #(WIDTH) sh (.*);
+  // endmodule
+  
 module ppa_prioritythermometer #(parameter N = 8) (
  input  logic  [N-1:0] a,
  output logic  [N-1:0] y);
@ -538,7 +565,7 @@ endmodule

 // *** some way to express data-critical inputs

-module ppa_flop_8 #(parameter WIDTH = 8) ( 
+module ppa_flop #(parameter WIDTH = 8) ( 
  input  logic             clk,
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);
@ -547,13 +574,26 @@ module ppa_flop_8 #(parameter WIDTH = 8) (
    q <= #1 d;
 endmodule

+module ppa_flop_8 #(parameter WIDTH = 8) ( 
+  input  logic             clk,
+  input  logic [WIDTH-1:0] d, 
+  output logic [WIDTH-1:0] q);
+
+  logic [WIDTH-1:0] q1;
+
+  ppa_flop #(WIDTH) f1(clk, d, q1);
+  ppa_flop #(WIDTH) f2(clk, q1, q);
+endmodule
+
 module ppa_flop_16 #(parameter WIDTH = 16) ( 
  input  logic             clk,
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flop #(WIDTH) f1(clk, d, q1);
+  ppa_flop #(WIDTH) f2(clk, q1, q);
 endmodule

 module ppa_flop_32 #(parameter WIDTH = 32) ( 
@ -561,8 +601,10 @@ module ppa_flop_32 #(parameter WIDTH = 32) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flop #(WIDTH) f1(clk, d, q1);
+  ppa_flop #(WIDTH) f2(clk, q1, q);
 endmodule

 module ppa_flop_64 #(parameter WIDTH = 64) ( 
@ -570,8 +612,10 @@ module ppa_flop_64 #(parameter WIDTH = 64) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flop #(WIDTH) f1(clk, d, q1);
+  ppa_flop #(WIDTH) f2(clk, q1, q);
 endmodule

 module ppa_flop_128 #(parameter WIDTH = 128) ( 
@ -579,8 +623,20 @@ module ppa_flop_128 #(parameter WIDTH = 128) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

+  logic [WIDTH-1:0] q1;
+
+  ppa_flop #(WIDTH) f1(clk, d, q1);
+  ppa_flop #(WIDTH) f2(clk, q1, q);
+endmodule
+
+module ppa_flopr #(parameter WIDTH = 8) ( 
+  input  logic             clk, reset,
+  input  logic [WIDTH-1:0] d, 
+  output logic [WIDTH-1:0] q);
+
  always_ff @(posedge clk)
-    q <= #1 d;
+    if (reset) q <= #1 0;
+    else       q <= #1 d;
 endmodule

 module ppa_flopr_8 #(parameter WIDTH = 8) ( 
@ -588,9 +644,10 @@ module ppa_flopr_8 #(parameter WIDTH = 8) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
+  ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_flopr_16 #(parameter WIDTH = 16) ( 
@ -598,9 +655,10 @@ module ppa_flopr_16 #(parameter WIDTH = 16) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
+  ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_flopr_32 #(parameter WIDTH = 32) ( 
@ -608,9 +666,10 @@ module ppa_flopr_32 #(parameter WIDTH = 32) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
+  ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_flopr_64 #(parameter WIDTH = 64) ( 
@ -618,9 +677,10 @@ module ppa_flopr_64 #(parameter WIDTH = 64) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
+  ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_flopr_128 #(parameter WIDTH = 128) ( 
@ -628,7 +688,18 @@ module ppa_flopr_128 #(parameter WIDTH = 128) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopr #(WIDTH) f1(clk, reset, d, q1);
+  ppa_flopr #(WIDTH) f2(clk, reset, q1, q);
+endmodule
+
+module ppa_floprasync #(parameter WIDTH = 8) ( 
+  input  logic             clk, reset,
+  input  logic [WIDTH-1:0] d, 
+  output logic [WIDTH-1:0] q);
+
+  always_ff @(posedge clk or posedge reset)
    if (reset) q <= #1 0;
    else       q <= #1 d;
 endmodule
@ -638,9 +709,10 @@ module ppa_floprasync_8 #(parameter WIDTH = 8) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk or posedge reset)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
+  ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_floprasync_16 #(parameter WIDTH = 16) ( 
@ -648,9 +720,10 @@ module ppa_floprasync_16 #(parameter WIDTH = 16) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk or posedge reset)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
+  ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_floprasync_32 #(parameter WIDTH = 32) ( 
@ -658,9 +731,10 @@ module ppa_floprasync_32 #(parameter WIDTH = 32) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk or posedge reset)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
+  ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_floprasync_64 #(parameter WIDTH = 64) ( 
@ -668,9 +742,10 @@ module ppa_floprasync_64 #(parameter WIDTH = 64) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk or posedge reset)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
+  ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
 endmodule

 module ppa_floprasync_128 #(parameter WIDTH = 128) ( 
@ -678,9 +753,20 @@ module ppa_floprasync_128 #(parameter WIDTH = 128) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk or posedge reset)
-    if (reset) q <= #1 0;
-    else       q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_floprasync #(WIDTH) f1(clk, reset, d, q1);
+  ppa_floprasync #(WIDTH) f2(clk, reset, q1, q);
+endmodule
+
+module ppa_flopenr #(parameter WIDTH = 8) (
+  input  logic             clk, reset, en,
+  input  logic [WIDTH-1:0] d, 
+  output logic [WIDTH-1:0] q);
+
+  always_ff @(posedge clk)
+    if (reset)   q <= #1 0;
+    else if (en) q <= #1 d;
 endmodule

 module ppa_flopenr_8 #(parameter WIDTH = 8) (
@ -688,9 +774,10 @@ module ppa_flopenr_8 #(parameter WIDTH = 8) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset)   q <= #1 0;
-    else if (en) q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
+  ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
 endmodule

 module ppa_flopenr_16 #(parameter WIDTH = 16) (
@ -698,9 +785,10 @@ module ppa_flopenr_16 #(parameter WIDTH = 16) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset)   q <= #1 0;
-    else if (en) q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
+  ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
 endmodule

 module ppa_flopenr_32 #(parameter WIDTH = 32) (
@ -708,9 +796,10 @@ module ppa_flopenr_32 #(parameter WIDTH = 32) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset)   q <= #1 0;
-    else if (en) q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
+  ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
 endmodule

 module ppa_flopenr_64 #(parameter WIDTH = 64) (
@ -718,9 +807,10 @@ module ppa_flopenr_64 #(parameter WIDTH = 64) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset)   q <= #1 0;
-    else if (en) q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
+  ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
 endmodule

 module ppa_flopenr_128 #(parameter WIDTH = 128) (
@ -728,9 +818,10 @@ module ppa_flopenr_128 #(parameter WIDTH = 128) (
  input  logic [WIDTH-1:0] d, 
  output logic [WIDTH-1:0] q);

-  always_ff @(posedge clk)
-    if (reset)   q <= #1 0;
-    else if (en) q <= #1 d;
+  logic [WIDTH-1:0] q1;
+
+  ppa_flopenr #(WIDTH) f1(clk, reset, en, d, q1);
+  ppa_flopenr #(WIDTH) f2(clk, reset, en, q1, q);
 endmodule

 module ppa_csa_8 #(parameter WIDTH = 8) (
--- a/pipelined/srt/Makefile
+++ b/pipelined/srt/Makefile
@ -8,3 +8,9 @@ testgen: testgen.c

 qst2: qst2.c
 	gcc qst2.c -lm -o qst2
+	gcc -lm -o testgen testgen.c
+	./testgen
+
+exptestgen: exptestgen.c
+	gcc -lm -o exptestgen exptestgen.c
+	./exptestgen
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@ -21,7 +21,7 @@

 /* Prototypes */

-void output(FILE *fptr, int e1, double a, int e2, double b, int r_exp, double r_mantissa);
+void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac);
 void printhex(FILE *fptr, double x);
 double random_input(void);
 double random_input_e(void);
@ -31,12 +31,13 @@ double random_input_e(void);
 void main(void)
 {
  FILE *fptr;
-  // e1 & e2 are exponents
-  // a & b are mantissas
-  // r_mantissa is result of mantissa divsion
-  // r_exp is result of exponent division
-  double a, b, r_mantissa, r_exp;
-  int e1, e2;
+  // aExp & bExp are exponents
+  // aFrac & bFrac are mantissas
+  // rFrac is result of fractional divsion
+  // rExp is result of exponent division
+  double aFrac, bFrac, rFrac;
+  int    aExp,  bExp,  rExp;
+  int    aSign, bSign, rSign;
  double mantissa[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.2, 1.01, 1.001, 1.0001,
@ -51,25 +52,28 @@ void main(void)
  }

  for (i=0; i<ENTRIES; i++) {
-    b = mantissa[i];
-    e2 = exponent[i] + bias;
+    bFrac = mantissa[i];
+    bExp = exponent[i] + bias;
+    bSign = i%2;
    for (j=0; j<ENTRIES; j++) {
-      a = mantissa[j];
-      e1 = exponent[j] + bias;
-      r_mantissa = a/b;
-      r_exp = e1 - e2 + bias;
-      output(fptr, e1, a, e2, b, r_exp, r_mantissa);
+      aFrac = mantissa[j];
+      aExp = exponent[j] + bias;
+      aSign = j%2;
+      rFrac = aFrac/bFrac;
+      rExp = aExp - bExp + bias;
+      rSign = (i+j)%2;
+      output(fptr, aSign, aExp, aFrac, bSign, bExp, bFrac, rSign, rExp, rFrac);
    }
  }
  
  // for (i = 0; i< RANDOM_VECS; i++) {
-  //   a = random_input();
-  //   b = random_input();
-  //   e1 = random_input_e() + BIAS; // make new random input function for exponents
-  //   e2 = random_input_e() + BIAS;
-  //   r_mantissa = a/b;
-  //   r_exp = e1 - e2 + BIAS;
-  //   output(fptr, e1, a, e2, b, r_exp, r_mantissa);
+  //   aFrac = random_input();
+  //   bFrac = random_input();
+  //   aExp = random_input_e() + BIAS; // make new random input function for exponents
+  //   bExp = random_input_e() + BIAS;
+  //   rFrac = a/b;
+  //   rEx[] = e1 - e2 + BIAS;
+  //   output(fptr, aExp, aFrac, bExp, bFrac, rExp, rFrac);
  // }

  fclose(fptr);
@ -77,19 +81,21 @@ void main(void)

 /* Functions */

-void output(FILE *fptr, int e1, double a, int e2, double b, int r_exp, double r_mantissa)
+void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, double bFrac, int rSign, int rExp, double rFrac)
 {
-  fprintf(fptr, "%03x", e1);
-  //printhex(fptr, e1, exp);
-  printhex(fptr, a);
+  // Print a in standard double format
+  fprintf(fptr, "%03x", aExp|(aSign<<11));
+  printhex(fptr, aFrac);
  fprintf(fptr, "_");
-  fprintf(fptr, "%03x", e2);
-  //printhex(fptr, e2, exp);
-  printhex(fptr, b);
+
+  // Print b in standard double format
+  fprintf(fptr, "%03x", bExp|(bSign<<11));
+  printhex(fptr, bFrac);
  fprintf(fptr, "_");
-  fprintf(fptr, "%03x", r_exp);
-  //printhex(fptr, r_exp, exp);
-  printhex(fptr, r_mantissa);
+
+  // Print r in standard double format
+  fprintf(fptr, "%03x", rExp|(rSign<<11));
+  printhex(fptr, rFrac);
  fprintf(fptr, "\n");
 }

--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@ -37,6 +37,8 @@ module srt #(parameter Nf=52) (
  input  logic Flush, // *** multiple pipe stages
  // Floating Point Inputs
  // later add exponents, signs, special cases
+  input  logic       XSign, YSign,
+  input  logic [`NE-1:0] XExp, YExp,
  input  logic [Nf-1:0] SrcXFrac, SrcYFrac,
  input  logic [`XLEN-1:0] SrcA, SrcB,
  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
@ -44,14 +46,18 @@ module srt #(parameter Nf=52) (
  input  logic       Signed, // Interpret integers as signed 2's complement
  input  logic       Int, // Choose integer inputss
  input  logic       Sqrt, // perform square root, not divide
-  output logic [Nf-1:0] Quot, Rem, // *** later handle integers
+  output logic       rsign,
+  output logic [Nf-1:0] Quot, Rem, QuotOTFC, // *** later handle integers
+  output logic [`NE-1:0] rExp,
  output logic [3:0] Flags
 );

  logic          qp, qz, qm; // quotient is +1, 0, or -1
-  logic [Nf-1:0] X, Dpreproc;
-  logic [Nf+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
-  logic [Nf+2:0] rp, rm;
+  logic [`NE-1:0] calcExp;
+  logic           calcSign;
+  logic [Nf-1:0]  X, Dpreproc;
+  logic [Nf+3:0]  WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
+  logic [Nf+2:0]  rp, rm;
 
  srtpreproc #(Nf) preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc);

@ -70,6 +76,8 @@ module srt #(parameter Nf=52) (
  // Accumulate quotient digits in a shift register
  qsel #(Nf) qsel(WS[55:52], WC[55:52], qp, qz, qm);
  qacc #(Nf+3) qacc(clk, Start, qp, qz, qm, rp, rm);
+  flopen #(`NE) expflop(clk, Start, calcExp, rExp);
+  flopen #(1) signflop(clk, Start, calcSign, rsign);

  // Divisor Selection logic
  inv dinv(D, Db);
@ -77,6 +85,12 @@ module srt #(parameter Nf=52) (

  // Partial Product Generation
  csa csa(WS, WC, Dsel, qp, WSA, WCA);
+  
+  otfc2 otfc2(clk, Start, qp, qz, qm, QuotOTFC);
+
+  expcalc expcalc(.XExp, .YExp, .calcExp);
+
+  signcalc signcalc(.XSign, .YSign, .calcSign);

  srtpostproc postproc(rp, rm, Quot);
 endmodule
@ -198,9 +212,57 @@ module qacc #(parameter N=55) (
    end */
 endmodule

+//////////
+// otfc //
+//////////
+
+module otfc2 #(parameter N=52) (
+  input  logic         clk,
+  input  logic         Start,
+  input  logic         qp, qz, qm,
+  output logic [N-1:0] r
+);
+
+  // The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come. 
+  //
+  // This code follows the psuedocode presented in the 
+  //  floating point chapter of the book. Right now, 
+  //  it is written for Radix-2 division.
+  //
+  // QM is Q-1. It allows us to write negative bits 
+  //  without using a costly CPA. 
+  logic [N+2:0] Q, QM, QNext, QMNext;
+  // QR and QMR are the shifted versions of Q and QM.
+  //  They are treated as [N-1:r] size signals, and 
+  //  discard the r most significant bits of Q and QM. 
+  logic [N+1:0] QR, QMR;
+
+  flopr #(N+3) Qreg(clk, Start, QNext, Q);
+  flopr #(N+3) QMreg(clk, Start, QMNext, QM);
+
+  always_comb begin
+    QR  = Q[N+1:0];
+    QMR = QM[N+1:0];     // Shift Q and QM
+    if (qp) begin
+      QNext  = {QR,  1'b1};
+      QMNext = {QR,  1'b0};
+    end else if (qz) begin
+      QNext  = {QR,  1'b0};
+      QMNext = {QMR, 1'b1};
+    end else begin        // If qp and qz are not true, then qm is
+      QNext  = {QMR, 1'b1};
+      QMNext = {QMR, 1'b0};
+    end 
+  end
+  assign r = Q[54] ? Q[53:2] : Q[52:1];
+
+endmodule
+
 /////////
 // inv //
 /////////
+
 module inv(input  logic [55:0] in, 
           output logic [55:0] out);

@ -247,6 +309,33 @@ module csa #(parameter N=56) (
 		    (in2[54:0] & in3[54:0]), cin};
 endmodule

+
+//////////////
+// expcalc  //
+//////////////
+
+module expcalc(
+  input logic  [`NE-1:0] XExp, YExp,
+  output logic [`NE-1:0] calcExp
+);
+
+  assign calcExp = XExp - YExp + 11'b01111111111;
+
+endmodule
+
+//////////////
+// signcalc //
+//////////////
+
+module signcalc(
+  input logic  XSign, YSign,
+  output logic calcSign
+);
+
+  assign calcSign = XSign ^ YSign;
+
+endmodule
+
 //////////////
 // finaladd //
 //////////////
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@ -38,35 +38,41 @@ endmodule
 //////////
 module testbench;
  logic         clk;
-  logic        req;
+  logic         req;
  logic         done;
-  logic [51:0] a;
-  logic [51:0] b;
-  logic  [51:0] r;
-  logic [54:0] rp, rm;   // positive quotient digits
+  logic [63:0]  a, b;
+  logic [51:0]  afrac, bfrac;
+  logic [10:0]  aExp, bExp;
+  logic         asign, bsign;
+  logic [51:0]  r, rOTFC;
+  logic [54:0]  rp, rm;   // positive quotient digits
 
  // Test parameters
  parameter MEM_SIZE = 40000;
-  parameter MEM_WIDTH = 52+52+52;
+  parameter MEM_WIDTH = 64+64+64;
 
-  `define memr  51:0
-  `define memb  103:52
-  `define mema  155:104
+  `define memr  63:0
+  `define memb  127:64
+  `define mema  191:128

  // Test logicisters
  logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
  logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
                            // bit field of an array 
-  logic    [51:0] correctr, nextr, diffn, diffp;
+  logic [63:0] correctr, nextr, diffn, diffp;
+  logic [10:0] rExp;
+  logic        rsign;
  integer testnum, errors;

  // Divider
  srt  #(52) srt(.clk, .Start(req), 
                .Stall(1'b0), .Flush(1'b0), 
-                .SrcXFrac(a), .SrcYFrac(b), 
+                .XExp(aExp), .YExp(bExp), .rExp,
+                .XSign(asign), .YSign(bsign), .rsign,
+                .SrcXFrac(afrac), .SrcYFrac(bfrac), 
                .SrcA('0), .SrcB('0), .Fmt(2'b00), 
                .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), 
-                .Quot(r), .Rem(), .Flags());
+                .Quot(r), .QuotOTFC(rOTFC), .Rem(), .Flags());

  // Counter
  counter counter(clk, req, done);
@ -88,7 +94,9 @@ module testbench;
      $readmemh ("testvectors", Tests);
      Vec = Tests[testnum];
      a = Vec[`mema];
+      {asign, aExp, afrac} = a;
      b = Vec[`memb];
+      {bsign, bExp, bfrac} = b;
      nextr = Vec[`memr];
      req <= #5 1;
    end
@ -100,16 +108,23 @@ module testbench;
      if (done) 
 	begin
 	  req <= #5 1;
-    diffp = correctr - r;
-    diffn = r - correctr;
-	  if (($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
+    diffp = correctr[51:0] - r;
+    diffn = r - correctr[51:0];
+	  if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
 	    begin
 	      errors = errors+1;
-	      $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
+	      $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
 	      $display("failed\n");
 	      $stop;
 	    end
-	  if (a === 52'hxxxxxxxxxxxxx)
+    if (r !== rOTFC) // Check if OTFC works
+      begin
+        errors = errors+1;
+        $display("OTFC is %h, should be %h\n", rOTFC, r);
+        $display("failed\n");
+        // $stop;
+      end
+	  if (afrac === 52'hxxxxxxxxxxxxx)
 	    begin
 	      $display("%d Tests completed successfully", testnum);
 	      $stop;
@ -122,9 +137,11 @@ module testbench;
 	  testnum = testnum+1;
 	  Vec = Tests[testnum];
 	  $display("a = %h  b = %h",a,b);
-	  a = Vec[`mema];
-	  b = Vec[`memb];
-	  nextr = Vec[`memr];
+    a = Vec[`mema];
+    {asign, aExp, afrac} = a;
+    b = Vec[`memb];
+    {bsign, bExp, bfrac} = b;
+    nextr = Vec[`memr];
 	end
    end
 
--- a/pipelined/srt/testvectors
+++ b/pipelined/srt/testvectors
--- a/pipelined/testbench/testbench-f64.sv
+++ b/pipelined/testbench/testbench-f64.sv
@ -1,123 +0,0 @@
-// testbench
-module testbench ();
-
-   logic [63:0] op1;		
-   logic [63:0] op2;
-   logic [2:0] 	FOpCtrlE;   
-   logic [2:0] 	FrmE;
-   logic 	op_type;	
-   logic 	FmtE;   		
-   logic 	OvEn;		
-   logic 	UnEn;   	
-
-   logic 	XSgnE, YSgnE, ZSgnE;
-   logic 	XSgnM, YSgnM;     
-   logic [10:0] XExpE, YExpE, ZExpE;
-   logic [10:0] XExpM, YExpM, ZExpM;
-   logic [52:0] XManE, YManE, ZManE;
-   logic [52:0] XManM, YManM, ZManM;
-
-   logic [10:0] BiasE;
-   logic 	XNaNE, YNaNE, ZNaNE;           
-   logic 	XNaNM, YNaNM, ZNaNM;           
-   logic 	XSNaNE, YSNaNE, ZSNaNE;        
-   logic 	XSNaNM, YSNaNM, ZSNaNM;        
-   logic 	XDenormE, YDenormE, ZDenormE;  
-   logic 	XZeroE, YZeroE, ZZeroE;        
-   logic 	XZeroM, YZeroM, ZZeroM;        
-   logic 	XInfE, YInfE, ZInfE;           
-   logic 	XInfM, YInfM, ZInfM;
-   logic 	XExpMaxE;  
-   logic 	XNormE;
-   logic 	FDivBusyE;   
-    
-   logic 	start;
-   logic 	reset;
-
-   logic 	XDenorm;
-   logic 	YDenorm;   
-   logic [63:0] AS_Result;	
-   logic [4:0] 	Flags;   	
-   logic 	Denorm;   	
-   logic 	done;
-
-   logic         clk;
-   logic [63:0]  yexpected;
-   logic [63:0]  vectornum, errors;    // bookkeeping variables
-   logic [199:0] testvectors[50000:0]; // array of testvectors
-   logic [7:0] 	 flags_expected;
-
-   integer 	handle3;
-   integer 	desc3;  
-   
-   // instantiate device under test
-   unpack unpack(.X(op1), .Y(op2), .Z(64'h0), .FOpCtrlE, .FmtE, 
-		       .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-		       .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
-		       .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
-   fpdiv fdivsqrt (.op1, .op2, .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]),
-		   .reset, .clk, .start, .P(~FmtE), .OvEn(1'b0), .UnEn(1'b0),
-		   .XNaNQ(XNaNE), .YNaNQ(YNaNE), .XInfQ(XInfE), .YInfQ(YInfE), .XZeroQ(XZeroE), .YZeroQ(YZeroE),
-		   .FDivBusyE, .done(done), .AS_Result(AS_Result), .Flags(Flags));
-
-
-   // current fpdivsqrt does not operation on denorms yet
-   assign Denorm = XDenormE | YDenormE | Flags[3];   
-
-  // generate clock to sequence tests
-  always
-    begin
-      clk = 1; # 5; clk = 0; # 5;
-    end
-   
-   initial
-     begin
-	handle3 = $fopen("f64_div_rne.out");
-	$readmemh("../testbench/fp/vectors/f64_div_rne.tv", testvectors);
-	vectornum = 0; errors = 0;
-	start = 1'b0;
-	// reset
-	reset = 1; #27; reset = 0;
-     end
-
-   initial
-     begin
-	desc3 = handle3;
-	// Operation (if applicable)
-	#0  op_type = 1'b0;
-	// Precision (32-bit or 64-bit)
-	#0  FmtE = 1'b1;
-	// From fctrl logic to dictate operation
-	#0  FOpCtrlE = 3'b000;
-	// Rounding Mode
-	#0  FrmE = 3'b000;
-	// Trap masking (n/a for RISC-V)
-	#0  OvEn = 1'b0;
-	#0  UnEn = 1'b0;
-     end
-
-   always @(posedge clk)
-     begin
-	if (~reset)
-	  begin
-	     #0; {op1, op2, yexpected, flags_expected} = testvectors[vectornum];
-	     #50 start = 1'b1;
-	     repeat (2)
-	       @(posedge clk);
-	     // deassert start after 2 cycles
-	     start = 1'b0;	
-	     repeat (10)
-	       @(posedge clk);
-	     $fdisplay(desc3, "%h_%h_%h_%b_%b | %h_%b", op1, op2, AS_Result, Flags, Denorm, yexpected, (AS_Result==yexpected));
-	     vectornum = vectornum + 1;
-	     if (testvectors[vectornum] === 200'bx) begin
-		$display("%d tests completed", vectornum);
-		$finish;
-	     end
-	  end // if (~reset)
-	$display("%d vectors processed", vectornum);
-     end // always @ (posedge clk)
-   
-endmodule // tb
-
-
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -17,6 +17,7 @@ module testbenchfp;
  string      FmaRnmTests[];  // list of FMA round to nearest max magnitude
  logic [2:0] OpCtrl[];       // list of op controls
  logic [2:0] Unit[];         // list of units being tested
+  logic WriteInt[];           // Is being written to integer resgiter
  logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
  logic [1:0] Fmt[];          // list of formats for the other units
  logic [1:0] FmaFmt[];       // list of formats for the FMA
@ -37,6 +38,7 @@ module testbenchfp;

  logic [1:0]           FmaFmtVal, FmtVal;          // value of the current Fmt
  logic [2:0]           UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
+  logic                 WriteIntVal;                // value of the current WriteInt
  logic [`FLEN-1:0]     X, Y, Z;                    // inputs read from TestFloat
  logic [`FLEN-1:0]     FmaRneX, FmaRneY, FmaRneZ;  // inputs read from TestFloat
  logic [`FLEN-1:0]     FmaRzX, FmaRzY, FmaRzZ;     // inputs read from TestFloat
@ -53,8 +55,9 @@ module testbenchfp;
  logic [4:0]	 	        ResFlg;                                                            // Result flags
  logic [4:0]           FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
  logic	[`FPSIZES/3:0]  ModFmt, FmaModFmt;  // format - 10 = half, 00 = single, 01 = double, 11 = quad
-  logic [`FLEN-1:0]     FmaRes, DivRes, CmpRes, CvtRes, CvtFpRes;  // Results from each unit
-  logic [4:0]           FmaFlg, CvtFpFlg, DivFlg, CvtIntFlg, CmpFlg;  // Outputed flags
+  logic [`FLEN-1:0]     FmaRes, DivRes, CmpRes, CvtRes;  // Results from each unit
+  logic [`XLEN-1:0]     CvtIntRes;  // Results from each unit
+  logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
  logic                 ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN;   // is the outputed result NaN
  logic                 AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN;   // is the correct answer NaN
  logic                 NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
@ -108,7 +111,6 @@ module testbenchfp;
  logic                 FmaRdXZero, FmaRdYZero, FmaRdZZero;
  logic                 FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
  logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
-  logic                 ZOrigDenorm, FmaRneZOrigDenorm, FmaRzZOrigDenorm, FmaRuZOrigDenorm, FmaRdZOrigDenorm, FmaRnmZOrigDenorm; // is the original precision dnormalized

  // in-between FMA signals
  logic                 Mult;
@ -150,6 +152,7 @@ module testbenchfp;
                                              Tests = {Tests, f128rv32cvtint};
                                              // add the op-codes for these tests to the op-code list
                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                                              // add what unit is used and the fmt to their lists (one for each test)
                                              for(int i = 0; i<20; i++) begin
                                                Unit = {Unit, `CVTINTUNIT};
@ -159,6 +162,7 @@ module testbenchfp;
                                                Tests = {Tests, f128rv64cvtint};
                                              // add the op-codes for these tests to the op-code list
                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                                              // add what unit is used and the fmt to their lists (one for each test)
                                              for(int i = 0; i<20; i++) begin
                                                Unit = {Unit, `CVTINTUNIT};
@ -172,39 +176,55 @@ module testbenchfp;
          Tests = {Tests, f128f64cvt};
          // add the op-ctrls (i.e. the format of the result)
          OpCtrl = {OpCtrl, 3'b01, 3'b11};
+          WriteInt = {WriteInt, 1'b0, 1'b0};
          // add the unit being tested and fmt (input format)
-          for(int i = 0; i<10; i++) begin
+          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `CVTFPUNIT};
            Fmt = {Fmt, 2'b11};
          end
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `CVTFPUNIT};
+            Fmt = {Fmt, 2'b01};
+          end
        end
        if(`F_SUPPORTED) begin // if single precision is supported
          // add the 128 <-> 32 bit conversions to the to-be-tested list
          Tests = {Tests, f128f32cvt};
          // add the op-ctrls (i.e. the format of the result)
          OpCtrl = {OpCtrl, 3'b00, 3'b11};
+          WriteInt = {WriteInt, 1'b0, 1'b0};
          // add the unit being tested and fmt (input format)
-          for(int i = 0; i<10; i++) begin
+          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `CVTFPUNIT};
            Fmt = {Fmt, 2'b11};
          end
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `CVTFPUNIT};
+            Fmt = {Fmt, 2'b00};
+          end
        end
        if(`ZFH_SUPPORTED) begin // if half precision is supported
          // add the 128 <-> 16 bit conversions to the to-be-tested list
          Tests = {Tests, f128f16cvt};
          // add the op-ctrls (i.e. the format of the result)
          OpCtrl = {OpCtrl, 3'b10, 3'b11};
+          WriteInt = {WriteInt, 1'b0, 1'b0};
          // add the unit being tested and fmt (input format)
-          for(int i = 0; i<10; i++) begin
+          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `CVTFPUNIT};
            Fmt = {Fmt, 2'b11};
          end
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `CVTFPUNIT};
+            Fmt = {Fmt, 2'b10};
+          end
        end
      end
      if (TEST === "cmp"   | TEST === "all") begin// if comparisons are being tested
        // add the compare tests/op-ctrls/unit/fmt
        Tests = {Tests, f128cmp};
        OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+          WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
          for(int i = 0; i<15; i++) begin
            Unit = {Unit, `CMPUNIT};
            Fmt = {Fmt, 2'b11};
@ -214,6 +234,7 @@ module testbenchfp;
        // add the addition tests/op-ctrls/unit/fmt
        Tests = {Tests, f128add};
        OpCtrl = {OpCtrl, `ADD_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `FMAUNIT};
            Fmt = {Fmt, 2'b11};
@ -223,6 +244,7 @@ module testbenchfp;
        // add the subtraction tests/op-ctrls/unit/fmt
        Tests = {Tests, f128sub};
        OpCtrl = {OpCtrl, `SUB_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `FMAUNIT};
            Fmt = {Fmt, 2'b11};
@ -232,6 +254,7 @@ module testbenchfp;
        // add the multiply tests/op-ctrls/unit/fmt
        Tests = {Tests, f128mul};
        OpCtrl = {OpCtrl, `MUL_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `FMAUNIT};
            Fmt = {Fmt, 2'b11};
@ -241,6 +264,7 @@ module testbenchfp;
        // add the divide tests/op-ctrls/unit/fmt
        Tests = {Tests, f128div};
        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `DIVUNIT};
            Fmt = {Fmt, 2'b11};
@ -250,6 +274,7 @@ module testbenchfp;
        // add the square-root tests/op-ctrls/unit/fmt
        Tests = {Tests, f128sqrt};
        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `DIVUNIT};
            Fmt = {Fmt, 2'b11};
@ -264,9 +289,7 @@ module testbenchfp;
        FmaRdTests  = {FmaRdTests,  "f128_mulAdd_rd.tv"};
        FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
        // add the format for the Fma
-        for(int i = 0; i<5; i++) begin
-          FmaFmt = {FmaFmt, 2'b11};
-        end
+        FmaFmt = {FmaFmt, 2'b11};
      end
    end
    if (`D_SUPPORTED) begin // if double precision is supported
@ -274,6 +297,7 @@ module testbenchfp;
                                              Tests = {Tests, f64rv32cvtint};
                                              // add the op-codes for these tests to the op-code list
                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                                              // add what unit is used and the fmt to their lists (one for each test)
                                              for(int i = 0; i<20; i++) begin
                                                Unit = {Unit, `CVTINTUNIT};
@ -281,9 +305,10 @@ module testbenchfp;
                                              end
                                              if (`XLEN == 64) begin // if 64-bit integers are being supported
                                                Tests = {Tests, f64rv64cvtint};
-                                              // add the op-codes for these tests to the op-code list
+                                                // add the op-codes for these tests to the op-code list
                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                              // add what unit is used and the fmt to their lists (one for each test)
+                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+                                                // add what unit is used and the fmt to their lists (one for each test)
                                                for(int i = 0; i<20; i++) begin
                                                  Unit = {Unit, `CVTINTUNIT};
                                                  Fmt = {Fmt, 2'b01};
@ -296,28 +321,39 @@ module testbenchfp;
          Tests = {Tests, f64f32cvt};
          // add the op-ctrls (i.e. the format of the result)
          OpCtrl = {OpCtrl, 3'b00, 3'b01};
+          WriteInt = {WriteInt, 1'b0, 1'b0};
          // add the unit being tested and fmt (input format)
-          for(int i = 0; i<10; i++) begin
+          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `CVTFPUNIT};
            Fmt = {Fmt, 2'b01};
          end
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `CVTFPUNIT};
+            Fmt = {Fmt, 2'b00};
+          end
        end
        if(`ZFH_SUPPORTED) begin // if half precision is supported
          // add the 64 <-> 16 bit conversions to the to-be-tested list
          Tests = {Tests, f64f16cvt};
          // add the op-ctrls (i.e. the format of the result)
          OpCtrl = {OpCtrl, 3'b10, 3'b01};
+          WriteInt = {WriteInt, 1'b0, 1'b0};
          // add the unit being tested and fmt (input format)
-          for(int i = 0; i<10; i++) begin
+          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `CVTFPUNIT};
            Fmt = {Fmt, 2'b01};
          end
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `CVTFPUNIT};
+            Fmt = {Fmt, 2'b10};
+          end
        end
      end
      if (TEST === "cmp"   | TEST === "all") begin // if comparisions are being tested
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f64cmp};
        OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
        for(int i = 0; i<15; i++) begin
          Unit = {Unit, `CMPUNIT};
          Fmt = {Fmt, 2'b01};
@ -327,6 +363,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f64add};
        OpCtrl = {OpCtrl, `ADD_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b01};
@ -336,6 +373,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f64sub};
        OpCtrl = {OpCtrl, `SUB_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b01};
@ -345,6 +383,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f64mul};
        OpCtrl = {OpCtrl, `MUL_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b01};
@ -354,6 +393,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f64div};
        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `DIVUNIT};
          Fmt = {Fmt, 2'b01};
@ -363,6 +403,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f64sqrt};
        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `DIVUNIT};
          Fmt = {Fmt, 2'b01};
@ -376,9 +417,7 @@ module testbenchfp;
        FmaRuTests  = {FmaRuTests,  "f64_mulAdd_ru.tv"};
        FmaRdTests  = {FmaRdTests,  "f64_mulAdd_rd.tv"};
        FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
-        for(int i = 0; i<5; i++) begin
-          FmaFmt = {FmaFmt, 2'b01};
-        end
+        FmaFmt = {FmaFmt, 2'b01};
      end
    end
    if (`F_SUPPORTED) begin // if single precision being supported
@ -386,6 +425,7 @@ module testbenchfp;
                                              Tests = {Tests, f32rv32cvtint};
                                              // add the op-codes for these tests to the op-code list
                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                                              // add what unit is used and the fmt to their lists (one for each test)
                                              for(int i = 0; i<20; i++) begin
                                                Unit = {Unit, `CVTINTUNIT};
@ -393,9 +433,10 @@ module testbenchfp;
                                              end
                                              if (`XLEN == 64) begin // if 64-bit integers are supported
                                                Tests = {Tests, f32rv64cvtint};
-                                              // add the op-codes for these tests to the op-code list
+                                                // add the op-codes for these tests to the op-code list
                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                              // add what unit is used and the fmt to their lists (one for each test)
+                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+                                                // add what unit is used and the fmt to their lists (one for each test)
                                              for(int i = 0; i<20; i++) begin
                                                Unit = {Unit, `CVTINTUNIT};
                                                Fmt = {Fmt, 2'b00};
@ -408,17 +449,23 @@ module testbenchfp;
          Tests = {Tests, f32f16cvt};
          // add the op-ctrls (i.e. the format of the result)
          OpCtrl = {OpCtrl, 3'b10, 3'b00};
+          WriteInt = {WriteInt, 1'b0, 1'b0};
          // add the unit being tested and fmt (input format)
-          for(int i = 0; i<10; i++) begin
+          for(int i = 0; i<5; i++) begin
            Unit = {Unit, `CVTFPUNIT};
            Fmt = {Fmt, 2'b00};
          end
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `CVTFPUNIT};
+            Fmt = {Fmt, 2'b10};
+          end
        end
      end
      if (TEST === "cmp"   | TEST === "all") begin // if comparision is being tested
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f32cmp};
        OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
        for(int i = 0; i<15; i++) begin
          Unit = {Unit, `CMPUNIT};
          Fmt = {Fmt, 2'b00};
@ -428,6 +475,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f32add};
        OpCtrl = {OpCtrl, `ADD_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b00};
@ -437,6 +485,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f32sub};
        OpCtrl = {OpCtrl, `SUB_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b00};
@ -446,6 +495,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f32mul};
        OpCtrl = {OpCtrl, `MUL_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b00};
@ -455,6 +505,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f32div};
        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `DIVUNIT};
          Fmt = {Fmt, 2'b00};
@ -464,6 +515,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f32sqrt};
        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `DIVUNIT};
          Fmt = {Fmt, 2'b00};
@ -473,13 +525,11 @@ module testbenchfp;
        // add each rounding mode to it's own list of tests
        //    - fma tests are very long, so run all rounding modes in parallel
        FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
-        // FmaRzTests  = {FmaRzTests,  "f32_mulAdd_rz.tv"};
-        // FmaRuTests  = {FmaRuTests,  "f32_mulAdd_ru.tv"};
-        // FmaRdTests  = {FmaRdTests,  "f32_mulAdd_rd.tv"};
-        // FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
-        // for(int i = 0; i<5; i++) begin
-          FmaFmt = {FmaFmt, 2'b00};
-        // end
+        FmaRzTests  = {FmaRzTests,  "f32_mulAdd_rz.tv"};
+        FmaRuTests  = {FmaRuTests,  "f32_mulAdd_ru.tv"};
+        FmaRdTests  = {FmaRdTests,  "f32_mulAdd_rd.tv"};
+        FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
+        FmaFmt = {FmaFmt, 2'b00};
      end
    end
    if (`ZFH_SUPPORTED) begin // if half precision supported
@ -487,26 +537,29 @@ module testbenchfp;
                                              Tests = {Tests, f16rv32cvtint};
                                              // add the op-codes for these tests to the op-code list
                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                                              // add what unit is used and the fmt to their lists (one for each test)
                                              for(int i = 0; i<20; i++) begin
                                                Unit = {Unit, `CVTINTUNIT};
                                                Fmt = {Fmt, 2'b10};
                                              end
                                              if (`XLEN == 64) begin // if 64-bit integers are supported
-                                              Tests = {Tests, f16rv64cvtint, f16rv32cvtint};
-                                              // add the op-codes for these tests to the op-code list
-                                              OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                              // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b10};
-                                              end
+                                                Tests = {Tests, f16rv64cvtint};
+                                                // add the op-codes for these tests to the op-code list
+                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+                                                // add what unit is used and the fmt to their lists (one for each test)
+                                                for(int i = 0; i<20; i++) begin
+                                                  Unit = {Unit, `CVTINTUNIT};
+                                                  Fmt = {Fmt, 2'b10};
+                                                end
                                              end
                                            end
      if (TEST === "cmp"   | TEST === "all") begin // if comparisions are being tested
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16cmp};
        OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
        for(int i = 0; i<15; i++) begin
          Unit = {Unit, `CMPUNIT};
          Fmt = {Fmt, 2'b10};
@ -516,6 +569,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16add};
        OpCtrl = {OpCtrl, `ADD_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b10};
@ -525,6 +579,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16sub};
        OpCtrl = {OpCtrl, `SUB_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b10};
@ -534,6 +589,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16mul};
        OpCtrl = {OpCtrl, `MUL_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `FMAUNIT};
          Fmt = {Fmt, 2'b10};
@ -543,6 +599,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16div};
        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `DIVUNIT};
          Fmt = {Fmt, 2'b10};
@ -552,6 +609,7 @@ module testbenchfp;
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16sqrt};
        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
        for(int i = 0; i<5; i++) begin
          Unit = {Unit, `DIVUNIT};
          Fmt = {Fmt, 2'b10};
@ -561,13 +619,11 @@ module testbenchfp;
        // add each rounding mode to it's own list of tests
        //    - fma tests are very long, so run all rounding modes in parallel
        FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
-        // FmaRzTests  = {FmaRzTests,  "f16_mulAdd_rz.tv"};
-        // FmaRuTests  = {FmaRuTests,  "f16_mulAdd_ru.tv"};
-        // FmaRdTests  = {FmaRdTests,  "f16_mulAdd_rd.tv"};
-        // FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
-        // for(int i = 0; i<5; i++) begin
-          FmaFmt = {FmaFmt, 2'b10};
-        // end
+        FmaRzTests  = {FmaRzTests,  "f16_mulAdd_rz.tv"};
+        FmaRuTests  = {FmaRuTests,  "f16_mulAdd_ru.tv"};
+        FmaRdTests  = {FmaRdTests,  "f16_mulAdd_rd.tv"};
+        FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
+        FmaFmt = {FmaFmt, 2'b10};
      end
    end

@ -606,6 +662,7 @@ module testbenchfp;
  always_comb UnitVal = Unit[TestNum];
  always_comb FmtVal = Fmt[TestNum];
  always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
+  always_comb WriteIntVal = WriteInt[OpCtrlNum];
  always_comb FrmVal = Frm[FrmNum];
  assign Mult = OpCtrlVal === 3'b100;

@ -624,7 +681,7 @@ module testbenchfp;
                                    .XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
                                    .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), 
                                    .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), 
-                                    .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .ZOrigDenormE(FmaRneZOrigDenorm),
+                                    .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
                                    .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), 
                                    .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), 
                                    .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
@ -634,7 +691,7 @@ module testbenchfp;
                                    .XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
                                    .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), 
                                    .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), 
-                                    .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .ZOrigDenormE(FmaRzZOrigDenorm),
+                                    .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
                                    .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), 
                                    .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), 
                                    .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
@ -644,7 +701,7 @@ module testbenchfp;
                                    .XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
                                    .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), 
                                    .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), 
-                                    .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .ZOrigDenormE(FmaRuZOrigDenorm),
+                                    .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
                                    .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), 
                                    .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), 
                                    .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
@ -654,7 +711,7 @@ module testbenchfp;
                                    .XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
                                    .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), 
                                    .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
-                                    .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .ZOrigDenormE(FmaRdZOrigDenorm),
+                                    .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
                                    .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), 
                                    .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), 
                                    .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
@ -663,7 +720,7 @@ module testbenchfp;
  readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[VectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg), 
                                    .XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
                                    .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), 
-                                    .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),  .ZOrigDenormE(FmaRnmZOrigDenorm),
+                                    .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
                                    .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
                                    .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), 
                                    .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), 
@ -673,7 +730,7 @@ module testbenchfp;
  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                    .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
                                    .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
-                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .ZOrigDenormE(ZOrigDenorm),
+                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
                                    .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                    .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
                                    .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), 
@ -699,13 +756,12 @@ module testbenchfp;
  fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), 
              .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), 
              .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
-              .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm),  
              .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ), 
              .NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
              .ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd)); 
  fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn), 
-              .ZExpM(FmaRneZExp), .ZOrigDenormM(FmaRneZOrigDenorm),
+              .ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
              .XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan), 
              .XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN), 
              .XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero), 
@ -718,13 +774,12 @@ module testbenchfp;
  fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), 
              .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), 
              .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
-              .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm),  
              .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ), 
              .NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
              .ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd)); 
  fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn), 
-              .ZExpM(FmaRzZExp),  .ZOrigDenormM(FmaRzZOrigDenorm),
+              .ZExpM(FmaRzZExp),  .ZDenormM(FmaRzZDenorm),
              .XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan), 
              .XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN), 
              .XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero), 
@ -737,13 +792,12 @@ module testbenchfp;
  fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), 
              .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), 
              .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
-              .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm),  
              .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ), 
              .NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
              .ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd)); 
  fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn), 
-              .ZExpM(FmaRuZExp),  .ZOrigDenormM(FmaRuZOrigDenorm),
+              .ZExpM(FmaRuZExp),  .ZDenormM(FmaRuZDenorm),
              .XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan), 
              .XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN), 
              .XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero), 
@ -755,14 +809,13 @@ module testbenchfp;
              .FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
  fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), 
              .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), 
-              .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
-              .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm),  
+              .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), 
              .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ), 
              .NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
              .ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd)); 
  fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn), 
-              .ZExpM(FmaRdZExp),  .ZOrigDenormM(FmaRdZOrigDenorm),
+              .ZExpM(FmaRdZExp),  .ZDenormM(FmaRdZDenorm),
              .XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan), 
              .XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN), 
              .XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero), 
@ -775,13 +828,12 @@ module testbenchfp;
  fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), 
              .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), 
              .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
-              .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm),  
              .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
              .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ), 
              .NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
              .ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd)); 
  fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn), 
-              .ZExpM(FmaRnmZExp),  .ZOrigDenormM(FmaRnmZOrigDenorm),
+              .ZExpM(FmaRnmZExp),  .ZDenormM(FmaRnmZDenorm),
              .XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan), 
              .XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN), 
              .XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero), 
@ -794,12 +846,11 @@ module testbenchfp;
  fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), 
              .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), 
              .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
-              .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm),  
              .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
              .ProdExpE, .AddendStickyE, .KillProdE); 
  fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn), 
-              .ZExpM(ZExp),  .ZOrigDenormM(ZOrigDenorm),
+              .ZExpM(ZExp),  .ZDenormM(ZDenorm),
              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), 
              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), 
              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), 
@ -809,7 +860,12 @@ module testbenchfp;
              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
              .FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
  // fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf), 
-  //             .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(Frmal), .FmtE(ModFmt), .CvtFpRes, .CvtFpFlgE);
+  //             .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
+  
+fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
+            .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
+            .XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), 
+            .CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), 
              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
@ -901,38 +957,55 @@ module testbenchfp;
      AnsNaN = 1'b0;
      ResNaN = 1'b0;
    end
-    else begin
-      case (FmtVal)
+    else if (UnitVal === `CVTFPUNIT) begin
+      case (OpCtrlVal[1:0])
          4'b11: begin // quad             
-            AnsNaN = &Ans[`FLEN-2:`NF]&(|Ans[`NF-1:0]);
-            ResNaN = &FmaRes[`FLEN-2:`NF]&(|FmaRes[`NF-1:0]);
+            AnsNaN = &Ans[`Q_LEN-2:`NF]&(|Ans[`Q_NF-1:0]);
+            ResNaN = &Res[`Q_LEN-2:`NF]&(|Res[`Q_NF-1:0]);
          end
          4'b01: begin // double                 
-            AnsNaN = &Ans[`LEN1-2:`NF1]&(|Ans[`NF1-1:0]);
-            ResNaN = &FmaRes[`LEN1-2:`NF1]&(|FmaRes[`NF1-1:0]);
+            AnsNaN = &Ans[`D_LEN-2:`D_NF]&(|Ans[`D_NF-1:0]);
+            ResNaN = &Res[`D_LEN-2:`D_NF]&(|Res[`D_NF-1:0]);
          end
          4'b00: begin // single
-            AnsNaN = &Ans[`LEN2-2:`NF2]&(|Ans[`NF2-1:0]);
-            ResNaN = &FmaRes[`LEN2-2:`NF2]&(|FmaRes[`NF2-1:0]);
+            AnsNaN = &Ans[`S_LEN-2:`S_NF]&(|Ans[`S_NF-1:0]);
+            ResNaN = &Res[`S_LEN-2:`S_NF]&(|Res[`S_NF-1:0]);
          end
          4'b10: begin // half
            AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]);
-            ResNaN = &FmaRes[`H_LEN-2:`H_NF]&(|FmaRes[`H_NF-1:0]);
+            ResNaN = &Res[`H_LEN-2:`H_NF]&(|Res[`H_NF-1:0]);
+          end
+      endcase
+    end
+    else begin
+      case (FmtVal)
+          4'b11: begin // quad             
+            AnsNaN = &Ans[`Q_LEN-2:`Q_NF]&(|Ans[`Q_NF-1:0]);
+            ResNaN = &Res[`Q_LEN-2:`Q_NF]&(|Res[`Q_NF-1:0]);
+          end
+          4'b01: begin // double                 
+            AnsNaN = &Ans[`D_LEN-2:`D_NF]&(|Ans[`D_NF-1:0]);
+            ResNaN = &Res[`D_LEN-2:`D_NF]&(|Res[`D_NF-1:0]);
+          end
+          4'b00: begin // single
+            AnsNaN = &Ans[`S_LEN-2:`S_NF]&(|Ans[`S_NF-1:0]);
+            ResNaN = &Res[`S_LEN-2:`S_NF]&(|Res[`S_NF-1:0]);
+          end
+          4'b10: begin // half
+            AnsNaN = &Ans[`H_LEN-2:`H_NF]&(|Ans[`H_NF-1:0]);
+            ResNaN = &Res[`H_LEN-2:`H_NF]&(|Res[`H_NF-1:0]);
          end
      endcase
    end
  end
-
-  // check results on falling edge of clk
-  always @(negedge clk) begin
-
+always_comb begin
    // select the result to check
    case (UnitVal)
      `FMAUNIT: Res = FmaRes;
      `DIVUNIT: Res = DivRes;
      `CMPUNIT: Res = CmpRes;
-      `CVTINTUNIT: Res = CvtRes;
-      `CVTFPUNIT: Res = CvtFpRes;
+      `CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
+      `CVTFPUNIT: Res = CvtRes;
    endcase

    // select the flag to check
@ -940,9 +1013,13 @@ module testbenchfp;
      `FMAUNIT: ResFlg = FmaFlg;
      `DIVUNIT: ResFlg = DivFlg;
      `CMPUNIT: ResFlg = CmpFlg;
-      `CVTINTUNIT: ResFlg = CvtIntFlg;
-      `CVTFPUNIT: ResFlg = CvtFpFlg;
+      `CVTINTUNIT: ResFlg = CvtFlg;
+      `CVTFPUNIT: ResFlg = CvtFlg;
    endcase
+end
+  // check results on falling edge of clk
+  always @(negedge clk) begin
+

    // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
    //    - the sign of the NaN does not matter for the opperations being tested
@ -1060,15 +1137,19 @@ module testbenchfp;
    else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format
      case (OpCtrlVal[1:0]) 
        2'b11: NaNGood = ((AnsFlg[4]&(Res[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
+                          (AnsNaN&(Res[`Q_LEN-2:0] === Ans[`Q_LEN-2:0])) | 
                          (XNaN&(Res[`Q_LEN-2:0] === {X[`Q_LEN-2:`Q_NF],1'b1,X[`Q_NF-2:0]})) | 
                          (YNaN&(Res[`Q_LEN-2:0] === {Y[`Q_LEN-2:`Q_NF],1'b1,Y[`Q_NF-2:0]})));
        2'b01: NaNGood = ((AnsFlg[4]&(Res[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
+                          (AnsNaN&(Res[`D_LEN-2:0] === Ans[`D_LEN-2:0])) | 
                          (XNaN&(Res[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | 
                          (YNaN&(Res[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})));
        2'b00: NaNGood = ((AnsFlg[4]&(Res[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
+                          (AnsNaN&(Res[`S_LEN-2:0] === Ans[`S_LEN-2:0])) | 
                          (XNaN&(Res[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | 
                          (YNaN&(Res[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})));
        2'b10: NaNGood = ((AnsFlg[4]&(Res[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
+                          (AnsNaN&(Res[`H_LEN-2:0] === Ans[`H_LEN-2:0])) | 
                          (XNaN&(Res[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | 
                          (YNaN&(Res[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})));
      endcase
@ -1086,15 +1167,23 @@ module testbenchfp;
  ///////////////////////////////////////////////////////////////////////////////////////////////

    // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
      errors += 1;
      $display("There is an error in %s", Tests[TestNum]);
      $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
      $stop;
    end
-    // in The RISC-V Instruction Set Manual (2019) section 11.8 specifies that
-    // if a any of the inputs to the EQ LT LE opperations then the opperation should return a 0
-    else if ((UnitVal === `CMPUNIT)&(XNaN|YNaN)&(Res !== (`FLEN)'(0))) begin
+    
+    // TestFloat sets the result to all 1's when there is an invalid result, however in 
+    // http://www.jhauser.us/arithmetic/TestFloat-3/doc/TestFloat-general.html it says
+    // for an unsigned integer result 0 is also okay
+
+    // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but 
+    // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
+    else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) | 
+            (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | 
+            (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | 
+            (Res === Ans | NaNGood | NaNGood === 1'bx)) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
      errors += 1;
      $display("There is an error in %s", Tests[TestNum]);
      $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -1147,6 +1236,8 @@ module testbenchfp;
      // increment the test
      TestNum += 1;

+      // clear the vectors
+      for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
      // read next files
      $readmemh({`PATH, Tests[TestNum]}, TestVectors);
      $readmemh({`PATH, FmaRneTests[TestNum]}, FmaRneVectors);
@ -1197,7 +1288,6 @@ module readfmavectors (
  input logic [1:0]           FmaFmt,                 // the format of the FMA inputs
  input logic [`FLEN*4+7:0]   TestVector,             // the test vector
  output logic [`FLEN-1:0]    Ans,                    // the correct answer
-  output logic                ZOrigDenormE,           // is z denormalized in it's original precision
  output logic [4:0]          AnsFlg,                 // the correct flag
  output logic                XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
  output logic [`NE-1:0]      XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
@ -1244,10 +1334,10 @@ module readfmavectors (
    endcase
  end
  
-  unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
+  unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
-                .XExpMaxE, .ZOrigDenormE);
+                .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
+                .XExpMaxE, .ZDenormE);
 endmodule


@ -1287,7 +1377,6 @@ module readvectors (
  output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
  output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
  output logic XNormE, XExpMaxE,
-  output logic ZOrigDenormE,
  output logic [`FLEN-1:0] X, Y, Z
 );

@ -1371,7 +1460,7 @@ module readvectors (
            Ans = TestVector[8];
          end
          2'b10:	begin	  // half
-            X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+3*(`H_LEN)-1:12+(`H_LEN)]};
+            X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+2*(`H_LEN)-1:12+(`H_LEN)]};
            Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+(`H_LEN)-1:12]};
            Ans = TestVector[8];
          end
@ -1464,89 +1553,105 @@ module readvectors (
        case (Fmt)
          2'b11: begin       // quad
            //     {is the integer a long,     is the opperation to an integer}
-            casex ({OpCtrl[2], OpCtrl[0]})
+            casex ({OpCtrl[2:1]})
              2'b11: begin       // long -> quad
+                X = {`FLEN{1'bx}};
                SrcA = TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)];
                Ans = TestVector[8+(`Q_LEN-1):8];
              end
-              2'b01:	begin	  // int -> quad
+              2'b10:	begin	  // int -> quad
                // correctly sign extend the integer depending on if it's a signed/unsigned test
-                SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`Q_LEN+`XLEN-1:8+(`Q_LEN)]};
+                X = {`FLEN{1'bx}};
+                SrcA = {{`XLEN-32{TestVector[8+`Q_LEN+32-1]}}, TestVector[8+`Q_LEN+32-1:8+(`Q_LEN)]};
                Ans = TestVector[8+(`Q_LEN-1):8];
              end
-              2'b10:	begin	  // quad -> long
+              2'b01:	begin	  // quad -> long
                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
+                SrcA = {`XLEN{1'bx}};
                Ans = {TestVector[8+(`XLEN-1):8]};
              end
-              2'b00:	begin	  // double -> long
-                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
-                Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
+              2'b00:	begin	  // quad -> int
+                X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
+                SrcA = {`XLEN{1'bx}};
+                Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
              end
            endcase
          end
          2'b01:	begin	  // double
-            //     {is the integer a long,     is the opperation to an integer}
-            casex ({OpCtrl[2], OpCtrl[0]})
+            //     {Int->Fp?, is the integer a long}
+            casex ({OpCtrl[2:1]})
              2'b11: begin       // long -> double
+                X = {`FLEN{1'bx}};
                SrcA = TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)];
-                Ans = TestVector[8+(`D_LEN-1):8];
+                Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
              end
-              2'b01:	begin	  // int -> double
+              2'b10:	begin	  // int -> double
                // correctly sign extend the integer depending on if it's a signed/unsigned test
-                SrcA = {{`XLEN-32{TestVector[8+`D_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`D_LEN+`XLEN-1:8+(`D_LEN)]};
-                Ans = TestVector[8+(`D_LEN-1):8];
+                X = {`FLEN{1'bx}};
+                SrcA = {{`XLEN-32{TestVector[8+`D_LEN+32-1]}}, TestVector[8+`D_LEN+32-1:8+(`D_LEN)]};
+                Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
              end
-              2'b10:	begin	  // double -> long
+              2'b01:	begin	  // double -> long
                X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]};
+                SrcA = {`XLEN{1'bx}};
                Ans = {TestVector[8+(`XLEN-1):8]};
              end
              2'b00:	begin	  // double -> int
-                X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`XLEN+`D_LEN-1:8+(`XLEN)]};
-                Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
+                X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+32+`D_LEN-1:8+(32)]};
+                SrcA = {`XLEN{1'bx}};
+                Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
              end
            endcase
          end
          2'b00:	begin	  // single
            //     {is the integer a long,     is the opperation to an integer}
-            casex ({OpCtrl[2], OpCtrl[0]})
+            casex ({OpCtrl[2:1]})
              2'b11: begin       // long -> single
+                X = {`FLEN{1'bx}};
                SrcA = TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)];
-                Ans = TestVector[8+(`S_LEN-1):8];
+                Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
              end
-              2'b01:	begin	  // int -> single
+              2'b10:	begin	  // int -> single
                // correctly sign extend the integer depending on if it's a signed/unsigned test
-                SrcA = {{`XLEN-32{TestVector[8+`S_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`S_LEN+`XLEN-1:8+(`S_LEN)]};
-                Ans = TestVector[8+(`S_LEN-1):8];
+                X = {`FLEN{1'bx}};
+                SrcA = {{`XLEN-32{TestVector[8+`S_LEN+32-1]}}, TestVector[8+`S_LEN+32-1:8+(`S_LEN)]};
+                Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
              end
-              2'b10:	begin	  // single -> long
+              2'b01:	begin	  // single -> long
                X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]};
+                SrcA = {`XLEN{1'bx}};
                Ans = {TestVector[8+(`XLEN-1):8]};
              end
              2'b00:	begin	  // single -> int
-                X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`XLEN+`S_LEN-1:8+(`XLEN)]};
-                Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}},TestVector[8+(`XLEN-1):8]};
+                X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+32+`S_LEN-1:8+(32)]};
+                SrcA = {`XLEN{1'bx}};
+                Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
              end
            endcase
          end
          2'b10:	begin	  // half
            //     {is the integer a long,     is the opperation to an integer}
-            casex ({OpCtrl[2], OpCtrl[0]})
+            casex ({OpCtrl[2:1]})
              2'b11: begin       // long -> half
+                X = {`FLEN{1'bx}};
                SrcA = TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)];
-                Ans = TestVector[8+(`H_LEN-1):8];
+                Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
              end
-              2'b01:	begin	  // int -> half
+              2'b10:	begin	  // int -> half
                // correctly sign extend the integer depending on if it's a signed/unsigned test
-                SrcA = {{`XLEN-32{TestVector[8+`H_LEN+`XLEN]&~OpCtrl[1]}}, TestVector[8+`H_LEN+`XLEN-1:8+(`H_LEN)]};
-                Ans = TestVector[8+(`H_LEN-1):8];
+                X = {`FLEN{1'bx}};
+                SrcA = {{`XLEN-32{TestVector[8+`H_LEN+32-1]}}, TestVector[8+`H_LEN+32-1:8+(`H_LEN)]};
+                Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
              end
-              2'b10:	begin	  // half -> long
+              2'b01:	begin	  // half -> long
                X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]};
+                SrcA = {`XLEN{1'bx}};
                Ans = {TestVector[8+(`XLEN-1):8]};
              end
              2'b00:	begin	  // half -> int
-                X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`XLEN+`H_LEN-1:8+(`XLEN)]};
-                Ans = {{`XLEN-32{TestVector[8+`XLEN]&~OpCtrl[1]}}, TestVector[8+(`XLEN-1):8]};
+                X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+32+`H_LEN-1:8+(32)]};
+                SrcA = {`XLEN{1'bx}};
+                Ans = {{`XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
              end
            endcase
          end
@ -1557,5 +1662,5 @@ module readvectors (
  unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
-                .XExpMaxE, .ZOrigDenormE);
+                .XExpMaxE);
 endmodule
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@ -87,7 +87,7 @@ logic [3:0] dummy;
        "arch64m":      if (`M_SUPPORTED) tests = arch64m;
        "arch64d":      if (`D_SUPPORTED) tests = arch64d;
        "imperas64i":                     tests = imperas64i;
-//        "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
+        //"imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
        "imperas64f":   if (`F_SUPPORTED) tests = imperas64f;
        "imperas64d":   if (`D_SUPPORTED) tests = imperas64d;
        "imperas64m":   if (`M_SUPPORTED) tests = imperas64m;
@ -110,7 +110,7 @@ logic [3:0] dummy;
        "arch32m":      if (`M_SUPPORTED) tests = arch32m;
        "arch32f":      if (`F_SUPPORTED) tests = arch32f;
        "imperas32i":                     tests = imperas32i;
-//        "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
+        //"imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
        "imperas32f":   if (`F_SUPPORTED) tests = imperas32f;
        "imperas32m":   if (`M_SUPPORTED) tests = imperas32m;
        "wally32a":     if (`A_SUPPORTED) tests = wally32a;
@ -183,7 +183,7 @@ logic [3:0] dummy;

      // read test vectors into memory
      pathname = tvpaths[tests[0].atoi()];
-/*      if (tests[0] == `IMPERASTEST)
+      /* if (tests[0] == `IMPERASTEST)
        pathname = tvpaths[0];
      else pathname = tvpaths[1]; */
      memfilename = {pathname, tests[test], ".elf.memfile"};
@ -255,7 +255,7 @@ logic [3:0] dummy;
          //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] &
 	      (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
            if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
-//            if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
+              // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
              // report errors unless they are garbage at the end of the sim
              // kind of hacky test for garbage right now
              $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
@ -368,11 +368,12 @@ module riscvassertions;
 	  assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
    assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
    assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
-//    assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
+    //    assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
    assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
    assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
    //assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
    //assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");    
+    assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
  end
 endmodule

@ -408,47 +409,45 @@ module DCacheFlushFSM
 	  logic 			 CacheValid  [numways-1:0] [numlines-1:0] [numwords-1:0];
 	  logic 			 CacheDirty  [numways-1:0] [numlines-1:0] [numwords-1:0];
 	  logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
-      for(index = 0; index < numlines; index++) begin
-		for(way = 0; way < numways; way++) begin
-		  for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
-			copyShadow #(.tagstart(tagstart),
-						 .loglinebytelen(loglinebytelen))
-			copyShadow(.clk,
-					   .start,
-					   .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
-					   .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
-					   .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
-					   .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
-					   .index(index),
-					   .cacheWord(cacheWord),
-					   .CacheData(CacheData[way][index][cacheWord]),
-					   .CacheAdr(CacheAdr[way][index][cacheWord]),
-					   .CacheTag(CacheTag[way][index][cacheWord]),
-					   .CacheValid(CacheValid[way][index][cacheWord]),
-					   .CacheDirty(CacheDirty[way][index][cacheWord]));
-		  end
-		end
+    for(index = 0; index < numlines; index++) begin
+		  for(way = 0; way < numways; way++) begin
+		    for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
+			    copyShadow #(.tagstart(tagstart),
+					.loglinebytelen(loglinebytelen))
+			    copyShadow(.clk,
+          .start,
+          .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
+          .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
+          .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
+          .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
+          .index(index),
+          .cacheWord(cacheWord),
+          .CacheData(CacheData[way][index][cacheWord]),
+          .CacheAdr(CacheAdr[way][index][cacheWord]),
+          .CacheTag(CacheTag[way][index][cacheWord]),
+          .CacheValid(CacheValid[way][index][cacheWord]),
+          .CacheDirty(CacheDirty[way][index][cacheWord]));
+        end
      end
+    end

-	  integer i, j, k;
+    integer i, j, k;

-	  always @(posedge clk) begin
-		if (start) begin #1
-		  #1
-			for(i = 0; i < numlines; i++) begin
-			  for(j = 0; j < numways; j++) begin
-				for(k = 0; k < numwords; k++) begin
-				  if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
-					ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
-				  end
-				end	
-			  end
-			end
-		end
-	  end
-
-	  
-	end
+    always @(posedge clk) begin
+      if (start) begin #1
+        #1
+        for(i = 0; i < numlines; i++) begin
+          for(j = 0; j < numways; j++) begin
+          for(k = 0; k < numwords; k++) begin
+            if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
+            ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
+            end
+          end	
+          end
+        end
+      end
+    end  
+  end
  flop #(1) doneReg(.clk, .d(start), .q(done));
 endmodule

--- a/pipelined/testbench/tests-fp.vh
+++ b/pipelined/testbench/tests-fp.vh
@ -34,14 +34,14 @@
 `define LE_OPCTRL 3'b011
 `define LT_OPCTRL 3'b001
 `define EQ_OPCTRL 3'b010
-`define TO_UI_OPCTRL   3'b011
+`define TO_UI_OPCTRL   3'b000
 `define TO_I_OPCTRL    3'b001
-`define TO_UL_OPCTRL   3'b111
-`define TO_L_OPCTRL    3'b101
-`define FROM_UI_OPCTRL 3'b010
-`define FROM_I_OPCTRL  3'b000
+`define TO_UL_OPCTRL   3'b010
+`define TO_L_OPCTRL    3'b011
+`define FROM_UI_OPCTRL 3'b100
+`define FROM_I_OPCTRL  3'b101
 `define FROM_UL_OPCTRL 3'b110
-`define FROM_L_OPCTRL  3'b100
+`define FROM_L_OPCTRL  3'b111
 `define RNE 3'b000
 `define RZ  3'b001
 `define RU  3'b011
@ -54,16 +54,6 @@
 `define CMPUNIT 4

 string f16rv32cvtint[] = '{
-	"f16_to_i32_rne.tv",
-	"f16_to_i32_rz.tv",
-	"f16_to_i32_ru.tv",
-	"f16_to_i32_rd.tv",
-	"f16_to_i32_rnm.tv",
-	"f16_to_ui32_rne.tv",
-	"f16_to_ui32_rz.tv",
-	"f16_to_ui32_ru.tv",
-	"f16_to_ui32_rd.tv",
-	"f16_to_ui32_rnm.tv",
 	"ui32_to_f16_rne.tv",
 	"ui32_to_f16_rz.tv",
 	"ui32_to_f16_ru.tv",
@ -73,20 +63,20 @@ string f16rv32cvtint[] = '{
 	"i32_to_f16_rz.tv",
 	"i32_to_f16_ru.tv",
 	"i32_to_f16_rd.tv",
-	"i32_to_f16_rnm.tv"
+	"i32_to_f16_rnm.tv",
+	"f16_to_ui32_rne.tv",
+	"f16_to_ui32_rz.tv",
+	"f16_to_ui32_ru.tv",
+	"f16_to_ui32_rd.tv",
+	"f16_to_ui32_rnm.tv",
+	"f16_to_i32_rne.tv",
+	"f16_to_i32_rz.tv",
+	"f16_to_i32_ru.tv",
+	"f16_to_i32_rd.tv",
+	"f16_to_i32_rnm.tv"
 };

 string f16rv64cvtint[] = '{
-	"f16_to_ui64_rne.tv",
-	"f16_to_ui64_rz.tv",
-	"f16_to_ui64_ru.tv",
-	"f16_to_ui64_rd.tv",
-	"f16_to_ui64_rnm.tv",
-	"f16_to_i64_rne.tv",
-	"f16_to_i64_rz.tv",
-	"f16_to_i64_ru.tv",
-	"f16_to_i64_rd.tv",
-	"f16_to_i64_rnm.tv",
 	"ui64_to_f16_rne.tv",
 	"ui64_to_f16_rz.tv",
 	"ui64_to_f16_ru.tv",
@ -96,7 +86,17 @@ string f16rv64cvtint[] = '{
 	"i64_to_f16_rz.tv",
 	"i64_to_f16_ru.tv",
 	"i64_to_f16_rd.tv",
-	"i64_to_f16_rnm.tv"
+	"i64_to_f16_rnm.tv",
+	"f16_to_ui64_rne.tv",
+	"f16_to_ui64_rz.tv",
+	"f16_to_ui64_ru.tv",
+	"f16_to_ui64_rd.tv",
+	"f16_to_ui64_rnm.tv",
+	"f16_to_i64_rne.tv",
+	"f16_to_i64_rz.tv",
+	"f16_to_i64_ru.tv",
+	"f16_to_i64_rd.tv",
+	"f16_to_i64_rnm.tv"
 };

 string f32rv32cvtint[] = '{
@ -307,16 +307,16 @@ string f128f32cvt[] = '{


 string f128f64cvt[] = '{
-	"f64_to_f128_rne.tv",
-	"f64_to_f128_rz.tv",
-	"f64_to_f128_ru.tv",
-	"f64_to_f128_rd.tv",
-	"f64_to_f128_rnm.tv",
 	"f128_to_f64_rne.tv",
 	"f128_to_f64_rz.tv",
 	"f128_to_f64_ru.tv",
 	"f128_to_f64_rd.tv",
-	"f128_to_f64_rnm.tv"
+	"f128_to_f64_rnm.tv",
+	"f64_to_f128_rne.tv",
+	"f64_to_f128_rz.tv",
+	"f64_to_f128_ru.tv",
+	"f64_to_f128_rd.tv",
+	"f64_to_f128_rnm.tv"
 };

 string f16add[] = '{
--- a/synthDC/.synopsys_dc.setup
+++ b/synthDC/.synopsys_dc.setup
@ -17,6 +17,9 @@ if {$tech == "sky130"} {
 } elseif {$tech == "sky90"} {
    set s9lib $timing_lib/sky90/sky90_sc/V1.7.4/lib
    lappend search_path $s9lib
+} elseif {$tech == "tsmc28"} {
+    set s10lib /proj/models/tsmc28/libraries/28nmtsmc/tcbn28hpcplusbwp30p140_190a/TSMCHOME/digital/Front_End/timing_power_noise/NLDM/tcbn28hpcplusbwp30p140_180a
+    lappend search_path $s10lib
 }

 # Synthetic libraries
@ -30,6 +33,8 @@ if {$tech == "sky130"} {
    lappend target_library $s8lib/sky130_osu_sc_12T_ms_TT_1P8_25C.ccs.db
 } elseif {$tech == "sky90"} {
    lappend target_library $s9lib/scc9gena_tt_1.2v_25C.db
+} elseif {$tech == "tsmc28"} {
+    lappend target_library $s10lib/tcbn28hpcplusbwp30p140tt0p9v25c.db
 }

 # Set Link Library
--- a/synthDC/ppaAnalyze.py
+++ b/synthDC/ppaAnalyze.py
@ -1,111 +1,135 @@
 #!/usr/bin/python3
 # Madeleine Masser-Frye mmasserfrye@hmc.edu 5/22

-from distutils.log import error
-from statistics import median
+from operator import index
 import subprocess
-import statistics
 import csv
 import re
+from matplotlib.cbook import flatten
 import matplotlib.pyplot as plt
 import matplotlib.lines as lines
+import matplotlib.axes as axes
 import numpy as np
+from collections import namedtuple


-def getData(mod=None, width=None):
-    specStr = ''
-    if mod != None:
-        specStr = mod
-        if width != None:
-            specStr += ('_'+str(width))
-    specStr += '*'
+def synthsfromcsv(filename):
+    Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy")
+    with open(filename, newline='') as csvfile:
+        csvreader = csv.reader(csvfile)
+        global allSynths
+        allSynths = list(csvreader)
+        for i in range(len(allSynths)):
+            for j in range(len(allSynths[0])):
+                try: allSynths[i][j] = int(allSynths[i][j])
+                except: 
+                    try: allSynths[i][j] = float(allSynths[i][j])
+                    except: pass
+            allSynths[i] = Synth(*allSynths[i])
+    
+def synthsintocsv():
+    ''' writes a CSV with one line for every available synthesis
+        each line contains the module, tech, width, target freq, and resulting metrics
+    '''
+    print("This takes a moment...")
+    bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
+    output = subprocess.check_output(['bash','-c', bashCommand])
+    allSynths = output.decode("utf-8").split('\n')[:-1]

-    bashCommand = "grep 'Critical Path Length' runs/ppa_{}/reports/*qor*".format(specStr)
-    outputCPL = subprocess.check_output(['bash','-c', bashCommand])
-    linesCPL = outputCPL.decode("utf-8").split('\n')[:-1]
+    specReg = re.compile('[a-zA-Z0-9]+')
+    metricReg = re.compile('\d+\.\d+[e]?[-+]?\d*')

-    bashCommand = "grep 'Design Area' runs/ppa_{}/reports/*qor*".format(specStr)
-    outputDA = subprocess.check_output(['bash','-c', bashCommand])
-    linesDA = outputDA.decode("utf-8").split('\n')[:-1]
-
-    bashCommand = "grep '100' runs/ppa_{}/reports/*power*".format(specStr)
-    outputP = subprocess.check_output(['bash','-c', bashCommand])
-    linesP = outputP.decode("utf-8").split('\n')[:-1]
-
-    cpl = re.compile('\d{1}\.\d{6}')
-    f = re.compile('_\d*_MHz')
-    wm = re.compile('ppa_\w*_\d*_qor')
-    da = re.compile('\d*\.\d{6}')
-    p = re.compile('\d+\.\d+[e-]*\d+')
-
-    allSynths = []
-    for i in range(len(linesCPL)):
-        line = linesCPL[i]
-        mwm = wm.findall(line)[0][4:-4].split('_')
-        freq = int(f.findall(line)[0][1:-4])
-        delay = float(cpl.findall(line)[0])
-        area = float(da.findall(linesDA[i])[0])
-        mod = mwm[0]
-        width = int(mwm[1])
-
-        power = p.findall(linesP[i])
-        lpower = float(power[2])
-        denergy = float(power[1])*delay
-
-        oneSynth = [mod, width, freq, delay, area, lpower, denergy]
-        allSynths += [oneSynth]
-
-    return allSynths
-
-def getVals(module, var, freq=None):
-    allSynths = getData(mod=module)
-
-    if (var == 'delay'):
-        ind = 3 
-        units = " (ns)"
-    elif (var == 'area'):
-        ind = 4
-        units = " (sq microns)"
-    elif (var == 'lpower'):
-        ind = 5
-        units = " (nW)"
-    elif (var == 'denergy'):
-        ind = 6
-        units = " (pJ)"
-    else:
-        error
-
-    widths = []
-    metric = []
-    if (freq != None):
-        for oneSynth in allSynths:
-            if (oneSynth[2] == freq):
-                widths += [oneSynth[1]]
-                metric += [oneSynth[ind]]
-    else:
-        widths = [8, 16, 32, 64, 128]
-        for w in widths:
-            m = 10000 # large number to start
-            for oneSynth in allSynths:
-                if (oneSynth[1] == w):
-                    if (oneSynth[3] < m): 
-                        m = oneSynth[3]
-                        met = oneSynth[ind]
-            metric += [met]
-    return widths, metric, units
-
-def writeCSV():
-    allSynths = getData()
    file = open("ppaData.csv", "w")
    writer = csv.writer(file)
-    writer.writerow(['Module', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)'])
+    writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (mJ)'])

-    for one in allSynths:
-        writer.writerow(one)
+    for oneSynth in allSynths:
+        module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7]
+        tech = tech[:-2]
+        metrics = []
+        for phrase in [['Path Length', 'qor'], ['Design Area', 'qor'], ['100', 'power']]:
+            bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
+            bashCommand = bashCommand.format(*phrase)
+            try: output = subprocess.check_output(['bash','-c', bashCommand])
+            except: print("At least one synth run doesn't have reports, try cleanup() first")
+            nums = metricReg.findall(str(output))
+            nums = [float(m) for m in nums]
+            metrics += nums
+        delay = metrics[0]
+        area = metrics[1]
+        lpower = metrics[4]
+        denergy = (metrics[2] + metrics[3])*delay # (switching + internal powers)*delay

+        writer.writerow([module, tech, width, freq, delay, area, lpower, denergy])
    file.close()

-def genLegend(fits, coefs, module, r2):
+def cleanup():
+    ''' removes runs that didn't work
+    '''
+    bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*'
+    try: 
+        output = subprocess.check_output(['bash','-c', bashCommand])
+        allSynths = output.decode("utf-8").split('\n')[:-1]
+        for run in allSynths:
+            run = run.split('MHz')[0]
+            bc = 'rm -r '+ run + '*'
+            output = subprocess.check_output(['bash','-c', bc])
+    except: pass
+
+    bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
+    output = subprocess.check_output(['bash','-c', bashCommand])
+    allSynths = output.decode("utf-8").split('\n')[:-1]
+    for oneSynth in allSynths:
+        for phrase in [['Path Length', 'qor'], ['Design Area', 'qor'], ['100', 'power']]:
+            bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
+            bashCommand = bashCommand.format(*phrase)
+            try: output = subprocess.check_output(['bash','-c', bashCommand])
+            except: 
+                bc = 'rm -r '+ oneSynth[2:]
+                try: output = subprocess.check_output(['bash','-c', bc])
+                except: pass
+    print("All cleaned up!")
+
+def getVals(tech, module, var, freq=None):
+    ''' for a specified tech, module, and variable/metric
+        returns a list of values for that metric in ascending width order
+        works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width
+    '''
+
+    global widths
+    metric = []
+    widthL = []
+
+    if (freq != None):
+        for oneSynth in allSynths:
+            if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module):
+                widthL += [oneSynth.width]
+                osdict = oneSynth._asdict()
+                metric += [osdict[var]]
+        metric = [x for _, x in sorted(zip(widthL, metric))] # ordering
+    else:
+        for w in widths:
+            m = 100000 # large number to start
+            for oneSynth in allSynths:
+                if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module):
+                    if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq): 
+                        m = oneSynth.delay
+                        osdict = oneSynth._asdict()
+                        met = osdict[var]
+            try: metric += [met]
+            except: pass
+
+    if ('flop' in module) & (var == 'area'):
+        metric = [m/2 for m in metric] # since two flops in each module 
+    if (var == 'denergy'):
+        metric = [m*1000 for m in metric] # more practical units for regression coefs
+
+    return metric
+
+def genLegend(fits, coefs, r2, spec):
+    ''' generates a list of two legend elements 
+        labels line with fit equation and dots with tech and r squared of the fit
+    '''

    coefsr = [str(round(c, 3)) for c in coefs]

@ -127,26 +151,17 @@ def genLegend(fits, coefs, module, r2):
        eq += " + " + coefsr[ind] + "*Nlog2(N)"
        ind += 1

-    legend_elements = [lines.Line2D([0], [0], color='orange', label=eq),
-                       lines.Line2D([0], [0], color='steelblue', ls='', marker='o', label=' R^2='+ str(round(r2, 4)))]
+    legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq),
+                       lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +'  $R^2$='+ str(round(r2, 4)))]
    return legend_elements

-def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'):
+def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True):
+    ''' module: string module name
+        freq: int freq (MHz)
+        var: string delay, area, lpower, or denergy
+        fits: constant, linear, square, log2, Nlog2
+        plots given variable vs width for all matching syntheses with regression
    '''
-    module: string module name
-    freq: int freq (MHz)
-    var: string delay, area, lpower, or denergy
-    fits: constant, linear, square, log2, Nlog2
-    plots chosen variable vs width for all matching syntheses with regression
-    '''
-    widths, metric, units = getVals(module, var, freq=freq)
-    coefs, r2, funcArr = regress(widths, metric, fits)
-
-    xp = np.linspace(8, 140, 200)
-    pred = []
-    for x in xp:
-        y = [func(x) for func in funcArr]
-        pred += [sum(np.multiply(coefs, y))]

    if ax is None:
        singlePlot = True
@ -154,21 +169,48 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'):
    else:
        singlePlot = False

-    ax.scatter(widths, metric)
-    ax.plot(xp, pred, color='orange')
+    fullLeg = []
+    global techSpecs
+    global widths

-    legend_elements = genLegend(fits, coefs, module, r2)
-    ax.legend(handles=legend_elements)
+    global norms
+
+    for spec in techSpecs:
+        metric = getVals(spec.tech, module, var, freq=freq)
+        
+        if norm:
+            techdict = spec._asdict()
+            norm = techdict[var]
+            metric = [m/norm for m in metric] # comment out to not normalize
+
+        if len(metric) == 5:
+            xp, pred, leg = regress(widths, metric, spec, fits)
+            fullLeg += leg
+
+            ax.scatter(widths, metric, color=spec.color, marker=spec.shape)
+            ax.plot(xp, pred, color=spec.color)
+
+    ax.legend(handles=fullLeg)

    ax.set_xticks(widths)
    ax.set_xlabel("Width (bits)")
-    ax.set_ylabel(str.title(var) + units)
+
+    if norm:
+        ylabeldic = {"lpower": "Normalized Leakage Power", "denergy": "Normalized Dynamic Energy", "area": "INVx1 Areas", "delay": "FO4 Delays"}
+    else:
+        ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ-CHECK)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
+
+    ax.set_ylabel(ylabeldic[var])

    if singlePlot:
-        ax.set_title(module + "  (target  " + str(freq) + "MHz)")
+        titleStr = "  (target  " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
+        ax.set_title(module + titleStr)
        plt.show()

-def regress(widths, var, fits='clsgn'):
+def regress(widths, var, spec, fits='clsgn'):
+    ''' fits a curve to the given points
+        returns lists of x and y values to plot that curve and legend elements with the equation
+    '''

    funcArr = genFuncs(fits)

@ -187,9 +229,22 @@ def regress(widths, var, fits='clsgn'):
    except:
        resid = 0
    r2 = 1 - resid / (y.size * y.var())
-    return coefs, r2, funcArr

-def makeCoefTable():
+    xp = np.linspace(8, 140, 200)
+    pred = []
+    for x in xp:
+        n = [func(x) for func in funcArr]
+        pred += [sum(np.multiply(coefs, n))]
+
+    leg = genLegend(fits, coefs, r2, spec)
+
+    return xp, pred, leg
+
+def makeCoefTable(tech):
+    ''' not currently in use, may salvage later
+        writes CSV with each line containing the coefficients for a regression fit 
+        to a particular combination of module, metric, and target frequency
+    '''
    file = open("ppaFitting.csv", "w")
    writer = csv.writer(file)
    writer.writerow(['Module', 'Metric', 'Freq', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2'])
@ -198,7 +253,8 @@ def makeCoefTable():
        for comb in [['delay', 5000], ['area', 5000], ['area', 10]]:
            var = comb[0]
            freq = comb[1]
-            widths, metric, units = getVals(mod, freq, var)
+            metric = getVals(tech, mod, freq, var)
+            global widths
            coefs, r2, funcArr = regress(widths, metric)
            row = [mod] + comb + np.ndarray.tolist(coefs) + [r2]
            writer.writerow(row)
@ -206,6 +262,9 @@ def makeCoefTable():
    file.close()

 def genFuncs(fits='clsgn'):
+    ''' helper function for regress()
+        returns array of functions with one for each term desired in the regression fit
+    '''
    funcArr = []
    if 'c' in fits:
        funcArr += [lambda x: 1]
@ -220,78 +279,188 @@ def genFuncs(fits='clsgn'):
    return funcArr

 def noOutliers(freqs, delays, areas):
+    ''' returns a pared down list of freqs, delays, and areas 
+        cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area
+        helper function to freqPlot()
+    '''
    f=[]
    d=[]
    a=[]
-    try:
-        med = statistics.median(freqs)
-        for i in range(len(freqs)):
-            norm = freqs[i]/med
-            if (norm > 0.25) & (norm<1.75):
-                f += [freqs[i]]
-                d += [delays[i]]
-                a += [areas[i]]
-    except: pass
+    ind = delays.index(min(delays))
+    med = freqs[ind]
+    for i in range(len(freqs)):
+        norm = freqs[i]/med
+        if (norm > 0.25) & (norm<1.75):
+            f += [freqs[i]]
+            d += [delays[i]]
+            a += [areas[i]]
    
    return f, d, a

-def freqPlot(mod, width):
-    allSynths = getData(mod=mod, width=width)
-
-    freqsV, delaysV, areasV, freqsA, delaysA, areasA = ([] for i in range(6))
+def freqPlot(tech, mod, width):
+    ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width
+    '''
+    global allSynths
+    freqsL, delaysL, areasL = ([[], []] for i in range(3))
    for oneSynth in allSynths:
-        if (mod == oneSynth[0]) & (width == oneSynth[1]):
-            if (1000/oneSynth[3] < oneSynth[2]):
-                freqsV += [oneSynth[2]]
-                delaysV += [oneSynth[3]]
-                areasV += [oneSynth[4]]
-            else:
-                freqsA += [oneSynth[2]]
-                delaysA += [oneSynth[3]]
-                areasA += [oneSynth[4]]
+        if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech):
+            ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period
+            freqsL[ind] += [oneSynth.freq]
+            delaysL[ind] += [oneSynth.delay]
+            areasL[ind] += [oneSynth.area]

-    freqsV, delaysV, areasV = noOutliers(freqsV, delaysV, areasV)
-    freqsA, delaysA, areasA = noOutliers(freqsA, delaysA, areasA)
+    f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True)

-    adprodA = np.multiply(areasA, delaysA)
-    adsqA = np.multiply(adprodA, delaysA)
-    adprodV = np.multiply(areasV, delaysV)
-    adsqV = np.multiply(adprodV, delaysV)
+    for ind in [0,1]:
+        areas = areasL[ind]
+        delays = delaysL[ind]
+        freqs = freqsL[ind]
+
+        if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module
+        freqs, delays, areas = noOutliers(freqs, delays, areas) # comment out to see all syntheses
+
+        c = 'blue' if ind else 'green'
+        adprod = adprodpow(areas, delays, 1)
+        adpow = adprodpow(areas, delays, 2)
+        ax1.scatter(freqs, delays, color=c)
+        ax2.scatter(freqs, areas, color=c)
+        ax3.scatter(freqs, adprod, color=c)
+        ax4.scatter(freqs, adpow, color=c)

    legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'),
                       lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')]

-    f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True)
-    ax1.scatter(freqsA, delaysA, color='green')
-    ax1.scatter(freqsV, delaysV, color='blue')
-    ax2.scatter(freqsA, areasA, color='green')
-    ax2.scatter(freqsV, areasV, color='blue')
-    ax3.scatter(freqsA, adprodA, color='green')
-    ax3.scatter(freqsV, adprodV, color='blue')
-    ax4.scatter(freqsA, adsqA, color='green')
-    ax4.scatter(freqsV, adsqV, color='blue')
    ax1.legend(handles=legend_elements)
+    
    ax4.set_xlabel("Target Freq (MHz)")
    ax1.set_ylabel('Delay (ns)')
    ax2.set_ylabel('Area (sq microns)')
    ax3.set_ylabel('Area * Delay')
-    ax4.set_ylabel('Area * Delay^2')
+    ax4.set_ylabel('Area * $Delay^2$')
    ax1.set_title(mod + '_' + str(width))
    plt.show()

-def plotPPA(mod, freq=None):
-    fig, axs = plt.subplots(2, 2)
-    oneMetricPlot(mod, 'delay', ax=axs[0,0], fits='clg', freq=freq)
-    oneMetricPlot(mod, 'area', ax=axs[0,1], fits='s', freq=freq)
-    oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits='c', freq=freq)
-    oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits='s', freq=freq)
-    titleStr = "  (target  " + str(freq)+ "MHz)" if freq != None else "  min delay"
-    plt.suptitle(mod + titleStr)
+def squareAreaDelay(tech, mod, width):
+    ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width
+    '''
+    global allSynths
+    freqsL, delaysL, areasL = ([[], []] for i in range(3))
+    for oneSynth in allSynths:
+        if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech):
+            ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period
+            freqsL[ind] += [oneSynth.freq]
+            delaysL[ind] += [oneSynth.delay]
+            areasL[ind] += [oneSynth.area]
+
+    f, (ax1) = plt.subplots(1, 1)
+    ax2 = ax1.twinx()
+
+    for ind in [0,1]:
+        areas = areasL[ind]
+        delays = delaysL[ind]
+        targets = freqsL[ind]
+        targets = [1000/f for f in targets]
+        
+        if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module
+        targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all 
+        
+        if not ind:
+            achievedDelays = delays
+
+        c = 'blue' if ind else 'green'
+        ax1.scatter(targets, delays, marker='^', color=c)
+        ax2.scatter(targets, areas, marker='s', color=c)
+    
+    bestAchieved = min(achievedDelays)
+        
+    legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'),
+                       lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'),
+                       lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'),
+                       lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')]
+
+    ax2.legend(handles=legend_elements, loc='upper left')
+    
+    ax1.set_xlabel("Delay Targeted (ns)")
+    ax1.set_ylabel("Delay Achieved (ns)")
+    ax2.set_ylabel('Area (sq microns)')
+    ax1.set_title(mod + '_' + str(width))
+
+    squarify(f)
+
+    xvals = np.array(ax1.get_xlim())
+    frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0])
+    areaLowerLim = min(flatten(areasL))-100
+    areaUpperLim = max(flatten(areasL))/frac + areaLowerLim
+    ax2.set_ylim([areaLowerLim, areaUpperLim])
+    ax1.plot(xvals, xvals, ls="--", c=".3")
+    ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--')
+
    plt.show()

-# writeCSV()
-# makeCoefTable()
+def squarify(fig):
+    ''' helper function for squareAreaDelay()
+        forces matplotlib figure to be a square
+    '''
+    w, h = fig.get_size_inches()
+    if w > h:
+        t = fig.subplotpars.top
+        b = fig.subplotpars.bottom
+        axs = h*(t-b)
+        l = (1.-axs/w)/2
+        fig.subplots_adjust(left=l, right=1-l)
+    else:
+        t = fig.subplotpars.right
+        b = fig.subplotpars.left
+        axs = w*(t-b)
+        l = (1.-axs/h)/2
+        fig.subplots_adjust(bottom=l, top=1-l)

-freqPlot('flopr', 128)
+def adprodpow(areas, delays, pow):
+    ''' for each value in [areas] returns area*delay^pow
+        helper function for freqPlot'''
+    result = []

-# plotPPA('add')
+    for i in range(len(areas)):
+        result += [(areas[i])*(delays[i])**pow]
+    
+    return result
+
+def plotPPA(mod, freq=None, norm=True):
+    ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits
+        if no freq specified, uses the synthesis with best achievable delay for each width
+        overlays data from both techs
+    '''
+    fig, axs = plt.subplots(2, 2)
+    global fitDict
+    modFit = fitDict[mod]
+    oneMetricPlot(mod, 'delay', ax=axs[0,0], fits=modFit[0], freq=freq, norm=norm)
+    oneMetricPlot(mod, 'area', ax=axs[0,1], fits=modFit[1], freq=freq, norm=norm)
+    oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits=modFit[1], freq=freq, norm=norm)
+    oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits=modFit[1], freq=freq, norm=norm)
+    titleStr = "  (target  " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)"
+    plt.suptitle(mod + titleStr)
+    plt.show()
+    
+if __name__ == '__main__':
+
+    # set up stuff, global variables
+    widths = [8, 16, 32, 64, 128]
+    # fitDict in progress
+    fitDict = {'add': ['cg', 'cl'], 'mult': ['clg', 's'], 'comparator': ['clsgn', 'clsgn'], 'csa': ['clsgn', 'clsgn'], 'shiftleft': ['clsgn', 'clsgn'], 'flop': ['cl', 'cl'], 'priorityencoder': ['clsgn', 'clsgn']}
+    TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
+    techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1.96, 1.98, 1], ['gf32', 'purple', 's', 15e-3, .351, .3116, 1], ['tsmc28', 'blue', '^', 12.2e-3, .252, 1.09, 1]]
+    techSpecs = [TechSpec(*t) for t in techSpecs]
+
+    # cleanup()
+    # synthsintocsv() # slow, run only when new synth runs to add to csv
+  
+    synthsfromcsv('ppaData.csv') # your csv here!
+
+    ### examples
+    # for mod in ['comparator', 'priorityencoder', 'shiftleft']:
+    #     for w in [16, 32]:
+    #         freqPlot('sky90', mod, w) # the weird ones
+    # squareAreaDelay('sky90', 'add', 32)
+    # oneMetricPlot('add', 'delay')
+    for mod in ['add', 'csa', 'mult', 'comparator', 'priorityencoder', 'shiftleft', 'flop']:
+        plotPPA(mod, norm=False) # no norm input now defaults to normalized
--- a/synthDC/ppaData.csv
+++ b/synthDC/ppaData.csv
--- a/synthDC/ppaSynth.py
+++ b/synthDC/ppaSynth.py
@ -42,21 +42,22 @@ def getData():
    return allSynths

 allSynths = getData()
-arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 14, 20, 40]
+arr = [-40, -20, -8, -6, -4, -2, 0, 2, 4, 6, 8, 12, 20, 40]

-widths = [32, 64, 128]
-modules = ['flopr']
-tech = 'sky90'
+widths = [16, 8, 32, 64, 128]
+modules = ['add']
+tech = 'tsmc28'
 LoT = []

 # # # initial sweep to get estimate of min delay
-# freqs = ['7500']
+# freqs = [25000, 35000]
 # for module in modules:
 #     for width in widths:
 #         for freq in freqs:
 #             LoT += [[module, width, tech, freq]]

-# thorough sweep based on estimate of min delay
+
+# # thorough sweep based on estimate of min delay
 for m in modules:
    for w in widths:
        delays = []
@ -69,7 +70,6 @@ for m in modules:
            LoT += [[m, w, tech, freq]]

 deleteRedundant(LoT)
-
 pool = Pool()
 pool.starmap(runCommand, LoT)
 pool.close()
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@ -117,6 +117,10 @@ if {$tech == "sky130"} {
    } else {
 	set_driving_cell  -lib_cell scc9gena_dfxbp_1 -pin Q $all_in_ex_clk
    }
+} elseif {$tech == "tsmc28"} {
+    if ($drive == "INV") {
+	set_driving_cell -lib_cell INVD1BWP30P140 -pin ZN $all_in_ex_clk
+    }
 }

 # Set input/output delay
@ -132,6 +136,10 @@ if {$tech == "sky130"} {
    } else {
        set_load [expr [load_of scc9gena_tt_1.2v_25C/scc9gena_dfxbp_1/D] * 1] [all_outputs]
    }
+} elseif {$tech == "tsmc28"} {
+    if ($drive == "INV") {
+	set_load [expr [load_of tcbn28hpcplusbwp30p140tt0p9v25c/INVD4BWP30P140/I] * 1] [all_outputs]
+    }
 }

 # Set the wire load model 
--- a/tests/fp/create_vectors.sh
+++ b/tests/fp/create_vectors.sh
@ -98,101 +98,101 @@ $BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
 $BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
 $BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
 echo "Creating f16_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
 echo "Creating f32_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
 echo "Creating f64_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
 echo "Creating f128_to_ui32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
 echo "Creating f16_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
 echo "Creating f32_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
 echo "Creating f64_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
 echo "Creating f128_to_ui64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
 echo "Creating f16_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
 echo "Creating f32_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
 echo "Creating f64_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
 echo "Creating f128_to_i32 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
+$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
+$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
 echo "Creating f16_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
 echo "Creating f32_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
 echo "Creating f64_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
 echo "Creating f128_to_i64 convert vectors"
-$BUILD/testfloat_gen -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
-$BUILD/testfloat_gen -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
-$BUILD/testfloat_gen -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
-$BUILD/testfloat_gen -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
-$BUILD/testfloat_gen -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
+$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
+$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
+$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
+$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
 echo "Creating f16_to_f32 convert vectors"
 $BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
 $BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv