diff --git a/README.md b/README.md
index 39b5c6780..7e0947412 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,49 @@
+# divremsqrt
+This branch contains the relevant hardware and test/synthesis flows for cvw's unified integer/fp divide/sqrt recurrence unit. The recurrence unit can be generated for a variety configurations, which span flavors of radix = {2,4}, floating-point precision = {float,double,quad}, integer width = {unsupported,32,64} and divider copies = {1,2,4,8}. 
+
+The fpu postprocessor on cvw handles inputs not only from the div/sqrt unit, but also the fma and convert units. This branch's drsu unit contains a postprocessor with logic only relevant to division/sqrt.
+
+# file hiearchy 
+
+The RTL files for the divider can be found under `cvw/src/fpu`
+
+The majority of divider modules are found in `cvw/src/fpu/divremsqrt`, which also borrows some modules from `cvw/src/fpu/fdivsqrt`
+
+divremsqrt/drsu desribes the top-level unit for the divider, taking in unpacked floating point signals, including Xs, Xm Xe, Ys, Ym, Ye.
+
+drsu first feeds signals to `divremsqrt/divremsqrt`, which contains the preprocessor, iteration units, fsm, and postprocessing logic. The postprocessor in `divremsqrt/divremsqrt` also contains all integer postprocessing logic. Outputs from `divremsqrt/divremsqrt` are then sent to `divremsqrt/divremsqrtpostprocess`, which handles rounding and flags.
+
+# verification flow
+
+drsu is verified with the risc-v arch test Berkeley SoftFloat floating point suite of test vectors for floating point square-root and division. In order to run the top-level regression script, run `regression-wally-intdiv -intdiv`
+
+The top-level regression python script is found accordingly in `cvw/bin/regression-wally-intdiv`. The testbench is found in `cvw/testbench/testbench_fp`, which runs drsu against testvectors. Batches of testvectors are stored within `cvw/testbench/tests-fp.vh`, and the raw binary test vectors are read from `tests/fp/vectors`
+
+Regression log files can be found in `cvw/sim/questa/logs` after running `regression-wally-intdiv -intdiv`. Files are named with `{precision}_ieee_div_{R}_{K}_{integer}_rv{XLEN}gc_{TESTNAME}.log`
+
+* precision denotes the floating-point precision types supported by the divider: f, fd, fdq, fdqh
+* R denotes the radix of the divider: 2,4
+* K denotes the number of divider copies in the unit: 1,2,4,8
+* integer denotes whether integer division/remainder is supported on the divider: i
+* XLEN denotes the width of integers: 32, 64 (this only matters if integer is supported on the divider)
+* TESTNAME denotes which tests are being run:
+    * fdivremsqrt: runs fdiv, fsqrt, intdiv, intrem
+    * fdiv: runs fdiv
+    * fsqrt: runs fsqrt
+   
+
+ 
+# synthesis flow
+To run synthesis results for all flavors of the recurrence unit, go to `cvw/synthDC/scripts` and run `python3 synthdrsu.py`. This will execute a python script that runs the installed version of synopsis design compiler on divider permutations for a target frequency of 5GHz and 100MHz. To then pipe area, delay and energy results to a CSV, run `./writeCSV.sh`. Results can then be viewed in `fp-synthresults_reordered.csv` in a format similar to the one presented in the paper.
+# start-up steps
+1) `git clone --recurse-submodules https://github.com/openhwgroup/cvw.git`
+2) `cd cvw`
+3) `git checkout divremsqrt`
+4) `source ./setup.sh`
+5) `make`
+6) `/sim/regression-wally -intdiv`
+
+
 # core-v-wally
 
 Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, including RV32/64, A, B, C, D, F, M, Q, and Zk* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO).   Wally is written in SystemVerilog.  It passes the [RISC-V Arch Tests](https://github.com/riscv-non-isa/riscv-arch-test) and boots Linux on an FPGA.  Configurations range from a minimal RV32E core to a fully featured RV64GC application processor.
diff --git a/bin/regression-wally b/bin/regression-wally
index cd4844b08..512856602 100755
--- a/bin/regression-wally
+++ b/bin/regression-wally
@@ -371,6 +371,7 @@ args = parser.parse_args()
 if (args.nightly):
     nightMode = "--nightly";
     sims = ["questa", "verilator", "vcs"] # exercise all simulators; can omit a sim if no license is available
+#    sims = ["questa", "verilator"] # exercise all simulators; can omit a sim if no license is available
 else:
     nightMode = ""
     sims = [defaultsim]
@@ -512,10 +513,12 @@ def main():
     elif args.fcov:
         TIMEOUT_DUR = 1*60
         os.system('rm -f questa/fcov_ucdb/* questa/fcov_logs/* questa/fcov/*')
-    elif args.nightly:
+    elif args.buildroot:
         TIMEOUT_DUR = 60*1440 # 1 day
     elif args.testfloat:
         TIMEOUT_DUR = 30*60 # seconds
+    elif args.nightly:
+        TIMEOUT_DUR = 30*60 # seconds
     else:
         TIMEOUT_DUR = 10*60 # seconds
 
diff --git a/bin/regression-wally-intdiv b/bin/regression-wally-intdiv
new file mode 100755
index 000000000..5408e8661
--- /dev/null
+++ b/bin/regression-wally-intdiv
@@ -0,0 +1,577 @@
+#!/usr/bin/python3
+##################################
+#
+# regression-wally
+# David_Harris@Hmc.edu 25 January 2021
+# Modified by Jarred Allen <jaallen@g.hmc.edu>
+#
+# Run a regression with multiple configurations in parallel and exit with
+# non-zero status code if an error happened, as well as printing human-readable
+# output.
+#
+##################################
+import sys,os,shutil
+import multiprocessing
+
+
+
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+from collections import namedtuple
+
+WALLY = os.environ.get('WALLY')
+regressionDir = WALLY + '/sim'
+os.chdir(regressionDir)
+
+coverage = '-coverage' in sys.argv
+fp = '-fp' in sys.argv
+nightly = '-nightly' in sys.argv
+softfloat = '-softfloat' in sys.argv
+intdiv = '-intdiv' in sys.argv
+
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
+# name:     the name of this test configuration (used in printing human-readable
+#           output and picking logfile names)
+# cmd:      the command to run to test (should include the logfile as '{}', and
+#           the command needs to write to that file)
+# grepstr:  the string to grep through the log file for. The test succeeds iff
+#           grep finds that string in the logfile (is used by grep, so it may
+#           be any pattern grep accepts, see `man 1 grep` for more info).
+
+# edit this list to add more test cases
+if (nightly):
+    nightMode = "-nightly";
+    configs = []
+else:
+    nightMode = "";
+    configs = [
+        TestCase(
+            name="lints",
+            variant="all",
+            cmd="./lint-wally " + nightMode + " | tee {}",
+            grepstr="lints run with no errors or warnings"
+        )
+    ]
+
+def getBuildrootTC(boot):
+    INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
+    MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
+    if boot:
+        name="buildrootboot"
+        BRcmd="vsim > {} -c <<!\ndo wally.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
+        BRgrepstr="WallyHostname login:"
+    else:
+        name="buildroot"
+        if (coverage):
+            print( "buildroot coverage")
+            BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0 -coverage\n!"
+        else:
+            print( "buildroot no coverage")
+            BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot configOptions -GINSTR_LIMIT=" +str(INSTR_LIMIT) + " \n!"
+        BRgrepstr=str(INSTR_LIMIT)+" instructions"
+    return  TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
+
+tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused
+
+tests64i = ["arch64i"] 
+for test in tests64i:
+  tc = TestCase(
+        name=test,
+        variant="rv64i",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv64i "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused
+tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32f_divsqrt", "arch32d_divsqrt", 
+             "arch32i", "arch32priv", "arch32c",  "arch32m", "arch32a", "arch32zifencei", "arch32zicond", 
+             "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zfh", "arch32zfh_fma", 
+             "arch32zfh_divsqrt", "arch32zfaf", "wally32a", "wally32priv", "wally32periph", 
+             "arch32zbkb", "arch32zbkc", "arch32zbkx", "arch32zknd", "arch32zkne", "arch32zknh"]  # "arch32zbc", "arch32zfad",
+#tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32i", "arch32priv", "arch32c",  "arch32m", "arch32a", "arch32zifencei", "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zicboz", "arch32zcb", "wally32a",  "wally32priv", "wally32periph"]  
+for test in tests32gc:
+  tc = TestCase(
+        name=test,
+        variant="rv32gc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests32imcimperas = ["imperas32i", "imperas32c"] # unused
+tests32imc = ["arch32i", "arch32c", "arch32m", "wally32periph"] 
+for test in tests32imc:
+  tc = TestCase(
+        name=test,
+        variant="rv32imc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32imc "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests32i = ["arch32i"] 
+for test in tests32i:
+  tc = TestCase(
+        name=test,
+        variant="rv32i",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32i "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+
+tests32e = ["arch32e"] 
+for test in tests32e:
+  tc = TestCase(
+        name=test,
+        variant="rv32e",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32e "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64f_divsqrt", "arch64d_divsqrt", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",  "arch64zfh", "arch64zfh_divsqrt", "arch64zfh_fma", "arch64zfaf", "arch64zfad", "arch64zbkb", "arch64zbkc", "arch64zbkx", "arch64zknd", "arch64zkne", "arch64zknh",
+             "arch64priv", "arch64c",  "arch64m", "arch64a", "arch64zifencei", "arch64zicond", "wally64a", "wally64periph", "wally64priv"] # add arch64zfh_fma when available; arch64zicobz, arch64zcb when working
+#tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", 
+#             "arch64priv", "arch64c",  "arch64m", "arch64a", "arch64zifencei", "wally64a", "wally64periph", "wally64priv", "arch64zicboz", "arch64zcb"] 
+if (coverage):  # delete all but 64gc tests when running coverage
+    configs = []
+    tests64gc = ["coverage64gc", "arch64i", "arch64priv", "arch64c",  "arch64m",
+                 "arch64zifencei", "arch64zicond", "arch64a", "wally64a", "wally64periph", "wally64priv", 
+                 "arch64zba",  "arch64zbb",  "arch64zbc", "arch64zbs"] # add when working: "arch64zcb", "arch64zicboz"
+    if (fp):
+       tests64gc.append("arch64f")
+       tests64gc.append("arch64d")
+       tests64gc.append("arch64zfh")
+       tests64gc.append("arch64f_fma")
+       tests64gc.append("arch64d_fma") 
+       tests64gc.append("arch64zfh_fma")
+       tests64gc.append("arch64f_divsqrt")
+       tests64gc.append("arch64d_divsqrt")
+       tests64gc.append("arch64zfh_divsqrt")
+       tests64gc.append("arch64zfaf")
+       tests64gc.append("arch64zfad")
+    coverStr = '-coverage'
+else:
+   coverStr = ''
+for test in tests64gc:
+  tc = TestCase(
+        name=test,
+        variant="rv64gc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+" " + coverStr + "\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+# run derivative configurations if requested  
+if (nightly):
+    derivconfigtests = [
+        ["tlb2_rv32gc", ["wally32priv"]],
+        ["tlb16_rv32gc", ["wally32priv"]],
+        ["tlb2_rv64gc", ["wally64priv"]],
+        ["tlb16_rv64gc", ["wally64priv"]],
+        ["way_1_4096_512_rv32gc", ["arch32i"]],
+        ["way_2_4096_512_rv32gc", ["arch32i"]],
+        ["way_8_4096_512_rv32gc", ["arch32i"]],
+        ["way_4_2048_512_rv32gc", ["arch32i"]],
+        ["way_4_4096_256_rv32gc", ["arch32i"]],
+        ["way_1_4096_512_rv64gc", ["arch64i"]],
+        ["way_2_4096_512_rv64gc", ["arch64i"]],
+        ["way_8_4096_512_rv64gc", ["arch64i"]],
+        ["way_4_2048_512_rv64gc", ["arch64i"]],
+        ["way_4_4096_256_rv64gc", ["arch64i"]],
+        ["way_4_4096_1024_rv64gc", ["arch64i"]],
+
+        ["ram_0_0_rv64gc", ["ahb64"]],
+        ["ram_1_0_rv64gc", ["ahb64"]],
+        ["ram_1_1_rv64gc", ["ahb64"]],
+        ["ram_2_0_rv64gc", ["ahb64"]],
+        ["ram_2_1_rv64gc", ["ahb64"]],
+        
+        ["noicache_rv32gc", ["ahb32"]],
+# cacheless designs will not work until DTIM supports FLEN > XLEN
+#        ["nodcache_rv32gc", ["ahb32"]],
+#        ["nocache_rv32gc", ["ahb32"]],
+        ["noicache_rv64gc", ["ahb64"]],
+        ["nodcache_rv64gc", ["ahb64"]],
+        ["nocache_rv64gc", ["ahb64"]],
+
+        ### add misaligned tests
+
+        ["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+
+        ### branch predictor simulation
+
+        # ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+        # ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+        # # btb
+        # ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+        # # ras
+        # ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+#  enable floating-point tests when lint is fixed
+        ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
+        ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
+        ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
+        ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
+        ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
+        ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
+        ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed
+        ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
+        ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
+        ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
+        
+
+    ]
+    for test in derivconfigtests:
+        config = test[0];
+        tests = test[1];
+        if(len(test) >= 4 and test[2] == "configOptions"):
+            configOptions = test[3]
+            cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
+        else:
+            configOptions = ""
+            cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
+        for t in tests:
+            tc = TestCase(
+                    name=t,
+                    variant=config,
+                    cmd=cmdPrefix+" "+t+" configOptions "+configOptions+"\n!",
+                    grepstr="All tests ran without failures")
+            configs.append(tc)
+
+
+
+
+# softfloat tests
+if (softfloat):
+    testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
+    configs = []
+    softfloatconfigs = [
+    "fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
+    "fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
+    "fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
+    "fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
+    "fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
+    "fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
+    "fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
+    "fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
+    "fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
+    "fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
+    "fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
+    "fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
+    "fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
+    "fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
+    "fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
+    "fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
+    "fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
+    "fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
+    "fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
+    "fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
+    "f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
+    "f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
+    "f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
+    "f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
+    ]
+    for config in softfloatconfigs:
+        # div test case
+        divtest = TestCase(
+            name="div",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " div \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,divtest)
+
+        # sqrt test case
+        sqrttest = TestCase(
+            name="sqrt",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " sqrt \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        #configs.append(sqrttest)
+        configs.insert(0,sqrttest)
+
+
+        # skip if divider variant config
+        if ("ieee" in config):
+            # cvtint test case
+            cvtinttest = TestCase(
+                name="cvtint",
+                variant=config,
+                cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " cvtint \n!",
+                grepstr="All Tests completed with          0 errors"
+                )
+            configs.append(cvtinttest)
+
+            # cvtfp test case
+            # WILL fail on F_only (refer to spec)
+            cvtfptest = TestCase(
+                name="cvtfp",
+                variant=config,
+                cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " cvtfp \n!",
+                grepstr="All Tests completed with          0 errors"
+            )
+            configs.append(cvtfptest)    
+      
+# intdiv verification
+if (intdiv):
+    configs = []
+    testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
+    # ***NOTE add to this
+    
+    intdivconfigs = [
+    "fdh_ieee_div_2_1i_rv32gc", "fdh_ieee_div_2_1i_rv64gc", "fdh_ieee_div_2_2i_rv32gc",
+    "fdh_ieee_div_2_2i_rv64gc", "fdh_ieee_div_2_4i_rv32gc", "fdh_ieee_div_2_4i_rv64gc",
+    "fdh_ieee_div_4_1i_rv32gc", "fdh_ieee_div_4_1i_rv64gc", "fdh_ieee_div_4_2i_rv32gc",
+    "fdh_ieee_div_4_2i_rv64gc", "fdh_ieee_div_4_4i_rv32gc", "fdh_ieee_div_4_4i_rv64gc",
+    "fd_ieee_div_2_1i_rv32gc", "fd_ieee_div_2_1i_rv64gc", "fd_ieee_div_2_2i_rv32gc",
+    "fd_ieee_div_2_2i_rv64gc", "fd_ieee_div_2_4i_rv32gc", "fd_ieee_div_2_4i_rv64gc",
+    "fd_ieee_div_4_1i_rv32gc", "fd_ieee_div_4_1i_rv64gc", "fd_ieee_div_4_2i_rv32gc",
+    "fd_ieee_div_4_2i_rv64gc", "fd_ieee_div_4_4i_rv32gc", "fd_ieee_div_4_4i_rv64gc",
+    "fdqh_ieee_div_2_1i_rv32gc", "fdqh_ieee_div_2_1i_rv64gc", "fdqh_ieee_div_2_2i_rv32gc",
+    "fdqh_ieee_div_2_2i_rv64gc", "fdqh_ieee_div_2_4i_rv32gc", "fdqh_ieee_div_2_4i_rv64gc",
+    "fdqh_ieee_div_4_1i_rv32gc", "fdqh_ieee_div_4_1i_rv64gc", "fdqh_ieee_div_4_2i_rv32gc",
+    "fdqh_ieee_div_4_2i_rv64gc", "fdqh_ieee_div_4_4i_rv32gc", "fdqh_ieee_div_4_4i_rv64gc",
+    "fdq_ieee_div_2_1i_rv32gc", "fdq_ieee_div_2_1i_rv64gc", "fdq_ieee_div_2_2i_rv32gc",
+    "fdq_ieee_div_2_2i_rv64gc", "fdq_ieee_div_2_4i_rv32gc", "fdq_ieee_div_2_4i_rv64gc",
+    "fdq_ieee_div_4_1i_rv32gc", "fdq_ieee_div_4_1i_rv64gc", "fdq_ieee_div_4_2i_rv32gc",
+    "fdq_ieee_div_4_2i_rv64gc", "fdq_ieee_div_4_4i_rv32gc", "fdq_ieee_div_4_4i_rv64gc",
+    "fh_ieee_div_2_1i_rv32gc", "fh_ieee_div_2_1i_rv64gc", "fh_ieee_div_2_2i_rv32gc",
+    "fh_ieee_div_2_2i_rv64gc", "fh_ieee_div_2_4i_rv32gc", "fh_ieee_div_2_4i_rv64gc",
+    "fh_ieee_div_4_1i_rv32gc", "fh_ieee_div_4_1i_rv64gc", "fh_ieee_div_4_2i_rv32gc",
+    "fh_ieee_div_4_2i_rv64gc", "fh_ieee_div_4_4i_rv32gc", "fh_ieee_div_4_4i_rv64gc",
+    "f_ieee_div_2_1i_rv32gc", "f_ieee_div_2_1i_rv64gc", "f_ieee_div_2_2i_rv32gc",
+    "f_ieee_div_2_2i_rv64gc", "f_ieee_div_2_4i_rv32gc", "f_ieee_div_2_4i_rv64gc",
+    "f_ieee_div_4_1i_rv32gc", "f_ieee_div_4_1i_rv64gc", "f_ieee_div_4_2i_rv32gc",
+    "f_ieee_div_4_2i_rv64gc", "f_ieee_div_4_4i_rv32gc", "f_ieee_div_4_4i_rv64gc",
+    "fd_ieee_div_2_8i_rv32gc",
+    "fd_ieee_div_2_8i_rv64gc",
+    "fdq_ieee_div_2_8i_rv64gc",
+    "fdq_ieee_div_2_8i_rv32gc",
+    "f_ieee_div_2_8i_rv64gc",
+    "f_ieee_div_2_8i_rv32gc"
+    ]
+    nointdivconfigs = [
+    "fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
+    "fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
+    "fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
+    "fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
+    "fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
+    "fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
+    "fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
+    "fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
+    "fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
+    "fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
+    "fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
+    "fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
+    "fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
+    "fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
+    "fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
+    "fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
+    "fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
+    "fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
+    "fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
+    "fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
+    "f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
+    "f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
+    "f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
+    "f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
+    ]
+    for config in intdivconfigs:
+        # fdivremsqrt test case
+        name = "div_drsu"
+        logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log" 
+        fdivremsqrttestcase = TestCase(
+            name=name,
+            variant=config,
+            cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,fdivremsqrttestcase)
+    for config in nointdivconfigs:
+        # div,sqrt test cases for no integer flavor of divider
+
+        name = "div_drsu"
+        logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log" 
+        divtestcase = TestCase(
+            name=name,
+            variant=config,
+            #cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " div_drsu \n!",
+            cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,divtestcase)
+
+        name = "sqrt_drsu"
+        logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log" 
+        sqrttestcase = TestCase(
+            name=name,
+            variant=config,
+            #cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " sqrt_drsu \n!",
+            cmd="wsim --tb testbench_fp  " + " " + config + " " + name + " > " + logname,
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,sqrttestcase)
+
+import os
+from multiprocessing import Pool, TimeoutError
+
+def search_log_for_text(text, logfile):
+    """Search through the given log file for text, returning True if it is found or False if it is not"""
+    grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile)
+    return os.system(grepcmd) == 0
+
+def run_test_case(config):
+    testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
+    """Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
+    #sim_logdir = WALLY+ "/sim/" + sim + "/logs/"
+    logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config.variant+"_"+config.name+".log" 
+    #logname = "logs/"+config.variant+"_"+config.name+".log"
+    cmd = config.cmd.format(logname)
+#    print(cmd)
+    os.chdir(regressionDir)
+    os.system(cmd)
+    if search_log_for_text(config.grepstr, logname):
+        print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name))
+        return 0
+    else:
+        print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name))
+        print("  Check %s" % logname)
+        return 1
+
+def main():
+    """Run the tests and count the failures"""
+    global configs, coverage
+    try:
+        os.chdir(regressionDir)
+        os.mkdir("logs")
+    except:
+        pass
+    try:
+        shutil.rmtree("wkdir")
+    except:
+        pass
+    finally:
+        os.mkdir("wkdir")
+ 
+    if '-makeTests' in sys.argv:
+        os.chdir(regressionDir)
+        os.system('./make-tests.sh | tee ./logs/make-tests.log')
+
+    if '-all' in sys.argv:
+        TIMEOUT_DUR = 30*7200 # seconds
+        configs.append(getBuildrootTC(boot=True))
+    elif '-buildroot' in sys.argv:
+        TIMEOUT_DUR = 30*7200 # seconds
+        configs=[getBuildrootTC(boot=True)]
+    elif '-coverage' in sys.argv:
+        TIMEOUT_DUR = 20*60 # seconds    
+    #   Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage.
+    #   Also it is slow to run.   
+    #    configs.append(getBuildrootTC(boot=False))
+        os.system('rm -f cov/*.ucdb')
+    elif '-nightly' in sys.argv:
+        TIMEOUT_DUR = 60*1440 # 1 day
+        configs.append(getBuildrootTC(boot=False))
+    elif '-softfloat' in sys.argv:
+        TIMEOUT_DUR = 60*60 # seconds
+    elif '-intdiv' in sys.argv:
+        TIMEOUT_DUR = 60*60 # seconds
+    else:
+        TIMEOUT_DUR = 10*60 # seconds
+        configs.append(getBuildrootTC(boot=False))
+
+    # Scale the number of concurrent processes to the number of test cases, but
+    # max out at a limited number of concurrent processes to not overwhelm the system
+    with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool:
+       num_fail = 0
+       results = {}
+       for config in configs:
+           results[config] = pool.apply_async(run_test_case,(config,))
+       for (config,result) in results.items():
+           try:
+             num_fail+=result.get(timeout=TIMEOUT_DUR)
+           except TimeoutError:
+             num_fail+=1
+             print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
+
+    # Coverage report
+    if coverage:
+       os.system('make coverage')
+    # Count the number of failures
+    if num_fail:
+        print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
+    else:
+        print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}")
+    return num_fail
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/bin/wsim b/bin/wsim
index 986957a3c..cef7eca27 100755
--- a/bin/wsim
+++ b/bin/wsim
@@ -28,6 +28,7 @@ parser.add_argument("--tb", "-t", help="Testbench", choices=["testbench", "testb
 parser.add_argument("--gui", "-g", help="Simulate with GUI", action="store_true")
 parser.add_argument("--ccov", "-c", help="Code Coverage", action="store_true")
 parser.add_argument("--fcov", "-f", help="Functional Coverage, implies lockstep", action="store_true")
+parser.add_argument("--fcov2", "-f2", help="Functional Coverage, implies lockstep", action="store_true")
 parser.add_argument("--fcovrvvi", "-fr", help="Functional Coverage RVVI", action="store_true")
 parser.add_argument("--args", "-a", help="Optional arguments passed to simulator via $value$plusargs", default="")
 parser.add_argument("--vcd", "-v", help="Generate testbench.vcd", action="store_true")
@@ -66,7 +67,7 @@ if(args.testsuite.endswith('.elf') and args.elf == ""): # No --elf argument; che
 
 
 # Validate arguments
-if (args.gui or args.ccov or args.fcov or args.fcovrvvi or args.lockstep):
+if (args.gui or args.ccov or args.fcov or args.fcov2 or args.fcovrvvi or args.lockstep):
     if args.sim not in ["questa", "vcs"]:
         print("Option only supported for Questa and VCS")
         exit(1)
@@ -81,7 +82,7 @@ if (args.rvvi):
 if(int(args.locksteplog) >= 1): EnableLog = 1
 else: EnableLog = 0
 if (args.lockstep):
-    prefix = "IMPERAS_TOOLS=" + WALLY + "/sim/imperas.ic"
+    prefix = "IMPERAS_TOOLS=" + WALLY + "/config/"+args.config+"/imperas.ic"
     if(args.locksteplog != 0): ImperasPlusArgs = " +IDV_TRACE2LOG=" + str(EnableLog) + " +IDV_TRACE2LOG_AFTER=" + str(args.locksteplog) 
     else: ImperasPlusArgs = ""
     if(args.fcov):
@@ -90,6 +91,12 @@ if (args.lockstep):
         else: EnableLog = 0
         ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr;
         suffix = ""
+    if(args.fcov2):
+        CovEnableStr = "1" if int(args.covlog) > 0  else "0";
+        if(args.covlog >= 1): EnableLog = 1
+        else: EnableLog = 0
+        ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr;
+        suffix = ""
     else:
         CovEnableStr = ""
         suffix = "--lockstep"
@@ -104,6 +111,8 @@ if (args.ccov):
     flags += " --ccov"
 if (args.fcov):
     flags += " --fcov"
+if (args.fcov2):
+    flags += " --fcov2"
 if (args.fcovrvvi):
     flags += "--fcovrvvi"
 
diff --git a/config/derivlist.txt b/config/derivlist.txt
index ab9ee703f..cac0c8c02 100644
--- a/config/derivlist.txt
+++ b/config/derivlist.txt
@@ -950,6 +950,9 @@ D_SUPPORTED     0
 ZCD_SUPPORTED   0
 ZFH_SUPPORTED   0
 
+deriv f_div_2_8_rv64gc    f_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv f_div_4_1_rv64gc    div_4_1_rv64gc    
 D_SUPPORTED     0
 ZCD_SUPPORTED   0
@@ -982,6 +985,9 @@ D_SUPPORTED     0
 ZCD_SUPPORTED   0
 ZFH_SUPPORTED   1
 
+deriv fh_div_2_8_rv32gc    fh_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fh_div_4_1_rv32gc    div_4_1_rv32gc    
 D_SUPPORTED     0
 ZCD_SUPPORTED   0
@@ -1012,6 +1018,9 @@ D_SUPPORTED     0
 ZCD_SUPPORTED   0
 ZFH_SUPPORTED   1
 
+deriv fh_div_2_8_rv64gc    fh_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fh_div_4_1_rv64gc    div_4_1_rv64gc    
 D_SUPPORTED     0
 ZCD_SUPPORTED   0
@@ -1038,6 +1047,9 @@ ZFH_SUPPORTED   0
 deriv fd_div_2_4_rv32gc    div_2_4_rv32gc
 ZFH_SUPPORTED   0
 
+deriv fd_div_2_8_rv32gc    fd_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fd_div_4_1_rv32gc    div_4_1_rv32gc    
 ZFH_SUPPORTED   0
 
@@ -1056,6 +1068,9 @@ ZFH_SUPPORTED   0
 deriv fd_div_2_4_rv64gc    div_2_4_rv64gc    
 ZFH_SUPPORTED   0
 
+deriv fd_div_2_8_rv64gc    fd_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fd_div_4_1_rv64gc    div_4_1_rv64gc    
 ZFH_SUPPORTED   0
 
@@ -1077,6 +1092,9 @@ ZFH_SUPPORTED   1
 deriv fdh_div_2_4_rv32gc    div_2_4_rv32gc
 ZFH_SUPPORTED   1
 
+deriv fdh_div_2_8_rv32gc    fdh_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fdh_div_4_1_rv32gc    div_4_1_rv32gc    
 ZFH_SUPPORTED   1
 
@@ -1095,6 +1113,9 @@ ZFH_SUPPORTED   1
 deriv fdh_div_2_4_rv64gc    div_2_4_rv64gc    
 ZFH_SUPPORTED   1
 
+deriv fdh_div_2_8_rv64gc    fdh_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fdh_div_4_1_rv64gc    div_4_1_rv64gc    
 ZFH_SUPPORTED   1
 
@@ -1118,6 +1139,9 @@ deriv fdq_div_2_4_rv32gc    div_2_4_rv32gc
 Q_SUPPORTED     1
 ZFH_SUPPORTED   0
 
+deriv fdq_div_2_8_rv32gc    fdq_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fdq_div_4_1_rv32gc    div_4_1_rv32gc    
 Q_SUPPORTED     1
 ZFH_SUPPORTED   0
@@ -1142,6 +1166,9 @@ deriv fdq_div_2_4_rv64gc    div_2_4_rv64gc
 Q_SUPPORTED     1
 ZFH_SUPPORTED   0
 
+deriv fdq_div_2_8_rv64gc    fdq_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fdq_div_4_1_rv64gc    div_4_1_rv64gc    
 Q_SUPPORTED     1
 ZFH_SUPPORTED   0
@@ -1168,6 +1195,9 @@ deriv fdqh_div_2_4_rv32gc    div_2_4_rv32gc
 Q_SUPPORTED     1
 ZFH_SUPPORTED   1
 
+deriv fdqh_div_2_8_rv32gc    fdqh_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fdqh_div_4_1_rv32gc    div_4_1_rv32gc    
 Q_SUPPORTED     1
 ZFH_SUPPORTED   1
@@ -1192,6 +1222,9 @@ deriv fdqh_div_2_4_rv64gc    div_2_4_rv64gc
 Q_SUPPORTED     1
 ZFH_SUPPORTED   1
 
+deriv fdqh_div_2_8_rv64gc    fdqh_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fdqh_div_4_1_rv64gc    div_4_1_rv64gc    
 Q_SUPPORTED     1
 ZFH_SUPPORTED   1
@@ -1215,6 +1248,9 @@ IEEE754         1
 deriv f_ieee_div_2_4_rv32gc    f_div_2_4_rv32gc    
 IEEE754         1
 
+deriv f_ieee_div_2_8_rv32gc    f_ieee_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv f_ieee_div_4_1_rv32gc    f_div_4_1_rv32gc    
 IEEE754         1
 
@@ -1233,6 +1269,9 @@ IEEE754         1
 deriv f_ieee_div_2_4_rv64gc    f_div_2_4_rv64gc    
 IEEE754         1
 
+deriv f_ieee_div_2_8_rv64gc    f_ieee_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv f_ieee_div_4_1_rv64gc    f_div_4_1_rv64gc    
 IEEE754         1
 
@@ -1252,6 +1291,9 @@ IEEE754         1
 deriv fh_ieee_div_2_4_rv32gc    fh_div_2_4_rv32gc    
 IEEE754         1
 
+deriv fh_ieee_div_2_8_rv32gc    fh_ieee_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fh_ieee_div_4_1_rv32gc    fh_div_4_1_rv32gc    
 IEEE754         1
 
@@ -1270,6 +1312,9 @@ IEEE754         1
 deriv fh_ieee_div_2_4_rv64gc    fh_div_2_4_rv64gc    
 IEEE754         1
 
+deriv fh_ieee_div_2_8_rv64gc    fh_ieee_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fh_ieee_div_4_1_rv64gc    fh_div_4_1_rv64gc    
 IEEE754         1
 
@@ -1289,6 +1334,9 @@ IEEE754         1
 deriv fd_ieee_div_2_4_rv32gc    fd_div_2_4_rv32gc    
 IEEE754         1
 
+deriv fd_ieee_div_2_8_rv32gc    fd_ieee_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fd_ieee_div_4_1_rv32gc    fd_div_4_1_rv32gc    
 IEEE754         1
 
@@ -1307,6 +1355,9 @@ IEEE754         1
 deriv fd_ieee_div_2_4_rv64gc    fd_div_2_4_rv64gc    
 IEEE754         1
 
+deriv fd_ieee_div_2_8_rv64gc    fd_ieee_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fd_ieee_div_4_1_rv64gc    fd_div_4_1_rv64gc    
 IEEE754         1
 
@@ -1327,6 +1378,9 @@ IEEE754         1
 deriv fdh_ieee_div_2_4_rv32gc    fdh_div_2_4_rv32gc    
 IEEE754         1
 
+deriv fdh_ieee_div_2_8_rv32gc    fdh_ieee_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fdh_ieee_div_4_1_rv32gc    fdh_div_4_1_rv32gc    
 IEEE754         1
 
@@ -1345,6 +1399,9 @@ IEEE754         1
 deriv fdh_ieee_div_2_4_rv64gc    fdh_div_2_4_rv64gc    
 IEEE754         1
 
+deriv fdh_ieee_div_2_8_rv64gc    fdh_ieee_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fdh_ieee_div_4_1_rv64gc    fdh_div_4_1_rv64gc    
 IEEE754         1
 
@@ -1364,6 +1421,9 @@ IEEE754         1
 deriv fdq_ieee_div_2_4_rv32gc    fdq_div_2_4_rv32gc    
 IEEE754         1
 
+deriv fdq_ieee_div_2_8_rv32gc    fdq_ieee_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fdq_ieee_div_4_1_rv32gc    fdq_div_4_1_rv32gc    
 IEEE754         1
 
@@ -1382,6 +1442,9 @@ IEEE754         1
 deriv fdq_ieee_div_2_4_rv64gc    fdq_div_2_4_rv64gc    
 IEEE754         1
 
+deriv fdq_ieee_div_2_8_rv64gc    fdq_ieee_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fdq_ieee_div_4_1_rv64gc    fdq_div_4_1_rv64gc    
 IEEE754         1
 
@@ -1402,6 +1465,9 @@ IEEE754         1
 deriv fdqh_ieee_div_2_4_rv32gc    fdqh_div_2_4_rv32gc    
 IEEE754         1
 
+deriv fdqh_ieee_div_2_8_rv32gc    fdqh_ieee_div_2_4_rv32gc    
+DIVCOPIES       32'd8
+
 deriv fdqh_ieee_div_4_1_rv32gc    fdqh_div_4_1_rv32gc    
 IEEE754         1
 
@@ -1420,6 +1486,9 @@ IEEE754         1
 deriv fdqh_ieee_div_2_4_rv64gc    fdqh_div_2_4_rv64gc    
 IEEE754         1
 
+deriv fdqh_ieee_div_2_8_rv64gc    fdqh_ieee_div_2_4_rv64gc    
+DIVCOPIES       32'd8
+
 deriv fdqh_ieee_div_4_1_rv64gc    fdqh_div_4_1_rv64gc    
 IEEE754         1
 
@@ -1440,6 +1509,9 @@ IDIV_ON_FPU     1
 deriv f_ieee_div_2_4i_rv32gc f_ieee_div_2_4_rv32gc        
 IDIV_ON_FPU     1
 
+deriv f_ieee_div_2_8i_rv32gc f_ieee_div_2_4i_rv32gc 
+DIVCOPIES       32'd8
+
 deriv f_ieee_div_4_1i_rv32gc f_ieee_div_4_1_rv32gc        
 IDIV_ON_FPU     1
 
@@ -1458,6 +1530,9 @@ IDIV_ON_FPU     1
 deriv f_ieee_div_2_4i_rv64gc f_ieee_div_2_4_rv64gc        
 IDIV_ON_FPU     1
 
+deriv f_ieee_div_2_8i_rv64gc f_ieee_div_2_4i_rv64gc 
+DIVCOPIES       32'd8
+
 deriv f_ieee_div_4_1i_rv64gc f_ieee_div_4_1_rv64gc        
 IDIV_ON_FPU     1
 
@@ -1477,6 +1552,9 @@ IDIV_ON_FPU     1
 deriv fh_ieee_div_2_4i_rv32gc fh_ieee_div_2_4_rv32gc        
 IDIV_ON_FPU     1
 
+deriv fh_ieee_div_2_8i_rv32gc fh_ieee_div_2_4i_rv32gc 
+DIVCOPIES       32'd8
+
 deriv fh_ieee_div_4_1i_rv32gc fh_ieee_div_4_1_rv32gc        
 IDIV_ON_FPU     1
 
@@ -1495,6 +1573,9 @@ IDIV_ON_FPU     1
 deriv fh_ieee_div_2_4i_rv64gc fh_ieee_div_2_4_rv64gc        
 IDIV_ON_FPU     1
 
+deriv fh_ieee_div_2_8i_rv64gc fh_ieee_div_2_4i_rv64gc 
+DIVCOPIES       32'd8
+
 deriv fh_ieee_div_4_1i_rv64gc fh_ieee_div_4_1_rv64gc        
 IDIV_ON_FPU     1
 
@@ -1515,6 +1596,9 @@ IDIV_ON_FPU     1
 deriv fd_ieee_div_2_4i_rv32gc fd_ieee_div_2_4_rv32gc        
 IDIV_ON_FPU     1
 
+deriv fd_ieee_div_2_8i_rv32gc fd_ieee_div_2_4i_rv32gc 
+DIVCOPIES       32'd8
+
 deriv fd_ieee_div_4_1i_rv32gc fd_ieee_div_4_1_rv32gc        
 IDIV_ON_FPU     1
 
@@ -1533,6 +1617,9 @@ IDIV_ON_FPU     1
 deriv fd_ieee_div_2_4i_rv64gc fd_ieee_div_2_4_rv64gc        
 IDIV_ON_FPU     1
 
+deriv fd_ieee_div_2_8i_rv64gc fd_ieee_div_2_4i_rv64gc 
+DIVCOPIES       32'd8
+
 deriv fd_ieee_div_4_1i_rv64gc fd_ieee_div_4_1_rv64gc        
 IDIV_ON_FPU     1
 
@@ -1553,6 +1640,9 @@ IDIV_ON_FPU     1
 deriv fdh_ieee_div_2_4i_rv32gc fdh_ieee_div_2_4_rv32gc        
 IDIV_ON_FPU     1
 
+deriv fdh_ieee_div_2_8i_rv32gc fdh_ieee_div_2_4i_rv32gc 
+DIVCOPIES       32'd8
+
 deriv fdh_ieee_div_4_1i_rv32gc fdh_ieee_div_4_1_rv32gc        
 IDIV_ON_FPU     1
 
@@ -1571,6 +1661,9 @@ IDIV_ON_FPU     1
 deriv fdh_ieee_div_2_4i_rv64gc fdh_ieee_div_2_4_rv64gc        
 IDIV_ON_FPU     1
 
+deriv fdh_ieee_div_2_8i_rv64gc fdh_ieee_div_2_4i_rv64gc 
+DIVCOPIES       32'd8
+
 deriv fdh_ieee_div_4_1i_rv64gc fdh_ieee_div_4_1_rv64gc        
 IDIV_ON_FPU     1
 
@@ -1591,6 +1684,9 @@ IDIV_ON_FPU     1
 deriv fdq_ieee_div_2_4i_rv32gc fdq_ieee_div_2_4_rv32gc        
 IDIV_ON_FPU     1
 
+deriv fdq_ieee_div_2_8i_rv32gc fdq_ieee_div_2_4i_rv32gc 
+DIVCOPIES       32'd8
+
 deriv fdq_ieee_div_4_1i_rv32gc fdq_ieee_div_4_1_rv32gc        
 IDIV_ON_FPU     1
 
@@ -1609,6 +1705,9 @@ IDIV_ON_FPU     1
 deriv fdq_ieee_div_2_4i_rv64gc fdq_ieee_div_2_4_rv64gc        
 IDIV_ON_FPU     1
 
+deriv fdq_ieee_div_2_8i_rv64gc fdq_ieee_div_2_4i_rv64gc 
+DIVCOPIES       32'd8
+
 deriv fdq_ieee_div_4_1i_rv64gc fdq_ieee_div_4_1_rv64gc        
 IDIV_ON_FPU     1
 
@@ -1629,6 +1728,9 @@ IDIV_ON_FPU     1
 deriv fdqh_ieee_div_2_4i_rv32gc fdqh_ieee_div_2_4_rv32gc        
 IDIV_ON_FPU     1
 
+deriv fdqh_ieee_div_2_8i_rv32gc fdqh_ieee_div_2_4i_rv32gc 
+DIVCOPIES       32'd8
+
 deriv fdqh_ieee_div_4_1i_rv32gc fdqh_ieee_div_4_1_rv32gc        
 IDIV_ON_FPU     1
 
@@ -1647,6 +1749,9 @@ IDIV_ON_FPU     1
 deriv fdqh_ieee_div_2_4i_rv64gc fdqh_ieee_div_2_4_rv64gc        
 IDIV_ON_FPU     1
 
+deriv fdqh_ieee_div_2_8i_rv64gc fdqh_ieee_div_2_4i_rv64gc 
+DIVCOPIES       32'd8
+
 deriv fdqh_ieee_div_4_1i_rv64gc fdqh_ieee_div_4_1_rv64gc        
 IDIV_ON_FPU     1
 
diff --git a/sim/imperas.ic b/config/rv32gc/imperas.ic
similarity index 94%
rename from sim/imperas.ic
rename to config/rv32gc/imperas.ic
index aee25eabf..c9b8292f2 100644
--- a/sim/imperas.ic
+++ b/config/rv32gc/imperas.ic
@@ -9,6 +9,7 @@
 #--showcommands
 
 # Core settings
+--variant RV32GC # for RV32GC
 --override cpu/priv_version=1.12 
 --override cpu/user_version=20191213
 # arch
@@ -38,11 +39,12 @@
 --override lr_sc_grain=8   # Za64rs requires <=64; we use native word size
 
 # 64 KiB continuous huge pages supported
---override cpu/Svpbmt=T
---override cpu/Svnapot_page_mask=65536
+#--override cpu/Svpbmt=F
+#--override cpu/Svnapot_page_mask=65536
 
-# SV39 and SV48 supported
---override cpu/Sv_modes=768
+# SV32 supported
+--override cpu/Sv_modes=3
+#--showoverrides
 
 --override cpu/Svinval=T
 
@@ -59,7 +61,7 @@
 
 --override cpu/reset_address=0x80000000
 
---override cpu/unaligned=T  # Zicclsm (should be true)
+--override cpu/unaligned=F  # Zicclsm (should be true)
 --override cpu/ignore_non_leaf_DAU=1
 --override cpu/wfi_is_nop=T
 --override cpu/misa_Extensions_mask=0x0 # MISA not writable
@@ -74,7 +76,7 @@
 --override cpu/PMP_undefined=T
 
 # mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception
---override cpu/mstatus_fs_mode=rvfs_write_nz
+--override cpu/mstatus_fs_mode=write_1
 
 # PMA Settings 
 # 'r': read access allowed
diff --git a/config/rv64gc/imperas.ic b/config/rv64gc/imperas.ic
new file mode 100644
index 000000000..fa9b56dfc
--- /dev/null
+++ b/config/rv64gc/imperas.ic
@@ -0,0 +1,117 @@
+# imperas.ic
+# Initialization file for ImperasDV lock step simulation
+# David_Harris@hmc.edu 15 August 2024
+# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+
+#--mpdconsole
+#--gdbconsole
+#--showoverrides
+#--showcommands
+
+# Core settings
+--override cpu/priv_version=1.12 
+--override cpu/user_version=20191213
+# arch
+--override cpu/mimpid=0x100
+--override cpu/mvendorid=0x602
+--override cpu/marchid=0x24
+--override refRoot/cpu/tvec_align=64
+--override refRoot/cpu/envcfg_mask=1   # dh 1/26/24 this should be deleted when ImperasDV is updated to allow envcfg.FIOM to be written
+
+# bit manipulation
+--override cpu/add_Extensions=B 
+--override cpu/bitmanip_version=1.0.0
+--override cpu/misa_B_Zba_Zbb_Zbs=T
+
+# More extensions
+--override cpu/Zcb=T
+--override cpu/Zicond=T
+--override cpu/Zfh=T
+--override cpu/Zfa=T
+
+# Cache block operations
+--override cpu/Zicbom=T
+--override cpu/Zicbop=T
+--override cpu/Zicboz=T
+--override cmomp_bytes=64  # Zic64b
+--override cmoz_bytes=64   # Zic64b
+--override lr_sc_grain=8   # Za64rs requires <=64; we use native word size
+
+# 64 KiB continuous huge pages supported
+--override cpu/Svpbmt=T
+--override cpu/Svnapot_page_mask=65536
+
+# SV39 and SV48 supported
+--override cpu/Sv_modes=768
+
+--override cpu/Svinval=T
+
+
+#  clarify
+#--override refRoot/cpu/mtvec_sext=F
+
+--override cpu/tval_ii_code=T
+
+#--override cpu/time_undefined=T
+#--override cpu/cycle_undefined=T
+#--override cpu/instret_undefined=T
+#--override cpu/hpmcounter_undefined=T
+
+--override cpu/reset_address=0x80000000
+
+--override cpu/unaligned=T  # Zicclsm (should be true)
+--override cpu/ignore_non_leaf_DAU=1
+--override cpu/wfi_is_nop=T
+--override cpu/misa_Extensions_mask=0x0 # MISA not writable
+--override cpu/Sstc=T
+
+# Enable SVADU hardware update of A/D bits when menvcfg.ADUE=1
+--override cpu/Svadu=T
+#--override cpu/updatePTEA=F
+#--override cpu/updatePTED=F
+
+--override cpu/PMP_registers=16
+--override cpu/PMP_undefined=T
+
+# mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception
+--override cpu/mstatus_fs_mode=write_1
+
+# PMA Settings 
+# 'r': read access allowed
+# 'w': write access allowed
+# 'x': execute access allowed
+# 'a': aligned access required
+# 'A': atomic instructions NOT allowed (actually USER1 privilege needed)
+# 'P': push/pop instructions NOT allowed (actually USER2 privilege needed)
+# '1': 1-byte accesses allowed
+# '2': 2-byte accesses allowed
+# '4': 4-byte accesses allowed
+# '8': 8-byte accesses allowed
+# '-', space: ignored (use for input string formatting).
+#
+# SVxx Memory 0x0000000000 0x7FFFFFFFFF
+#
+--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0xFFFFFFFFFFFFFFFFFF -attributes " ---a-- ---- " # All memory inaccessible unless defined otherwise
+--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0x7FFFFFFFFF -attributes " ---a-- ---- " # INITIAL
+--callcommand refRoot/cpu/setPMA -lo 0x0000001000 -hi 0x0000001FFF -attributes " r-x-A- 1248 " # BOOTROM
+--callcommand refRoot/cpu/setPMA -lo 0x0000012100 -hi 0x000001211F -attributes " rw-aA- --48 " # SDC
+--callcommand refRoot/cpu/setPMA -lo 0x0002000000 -hi 0x000200FFFF -attributes " rw-aA- 1248 " # CLINT
+--callcommand refRoot/cpu/setPMA -lo 0x000C000000 -hi 0x000FFFFFFF -attributes " rw-aA- --4- " # PLIC
+--callcommand refRoot/cpu/setPMA -lo 0x0010000000 -hi 0x0010000007 -attributes " rw-aA- 1--- " # UART0 error - 0x10000000 - 0x100000FF
+--callcommand refRoot/cpu/setPMA -lo 0x0010060000 -hi 0x00100600FF -attributes " rw-aA- --4- " # GPIO  error - 0x10069000 - 0x100600FF
+--callcommand refRoot/cpu/setPMA -lo 0x0010040000 -hi 0x0010040FFF -attributes " rw-aA- --4- " # SPI   error - 0x10040000 - 0x10040FFF
+--callcommand refRoot/cpu/setPMA -lo 0x0080000000 -hi 0x008FFFFFFF -attributes " rwx--- 1248 " # UNCORE_RAM
+
+# Enable the Imperas instruction coverage
+#-extlib    refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0
+#-override  refRoot/cpu/cv/cover=basic
+#-override  refRoot/cpu/cv/extensions=RV32I
+
+# Add Imperas simulator application instruction tracing
+# uncomment these to provide tracing
+#--verbose --trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange # --traceafter 300000000
+#--override cpu/debugflags=6 --override cpu/verbose=1
+#--override cpu/show_c_prefix=T
+
+# Store simulator output to logfile
+--output imperas.log
diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh
index 91e1d4100..445dc392f 100644
--- a/config/shared/config-shared.vh
+++ b/config/shared/config-shared.vh
@@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN
 
 localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));                  // log_2(NORMSHIFTSZ)
 
+localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF));
+localparam NORMSHIFTSZDRSU = DIVb+1+NF;
+localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU);
+
 // Disable spurious Verilator warnings
 
 /* verilator lint_off STMTDLY */
diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh
index c80b00232..bb036c94d 100644
--- a/config/shared/parameter-defs.vh
+++ b/config/shared/parameter-defs.vh
@@ -194,6 +194,8 @@ localparam cvw_t P = '{
   FMALEN : FMALEN,
   NORMSHIFTSZ : NORMSHIFTSZ,
   LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
+  NORMSHIFTSZDRSU : NORMSHIFTSZDRSU,
+  LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU,
   LOGR        : LOGR,
   RK          : RK,
   FPDUR       : FPDUR,
diff --git a/fpga/zsbl/boot.c b/fpga/zsbl/boot.c
index b21c49f48..d9a824a49 100644
--- a/fpga/zsbl/boot.c
+++ b/fpga/zsbl/boot.c
@@ -52,6 +52,42 @@ when 8 bytes are transferred
 
  */
 
+// crc16 table to reduce byte processing time
+static const uint16_t crctable[256] = {
+  0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 
+  0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 
+  0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 
+  0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 
+  0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 
+  0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 
+  0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, 
+  0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 
+  0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, 
+  0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 
+  0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, 
+  0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 
+  0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, 
+  0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 
+  0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, 
+  0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 
+  0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 
+  0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 
+  0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 
+  0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, 
+  0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 
+  0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 
+  0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 
+  0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 
+  0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 
+  0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, 
+  0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 
+  0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, 
+  0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 
+  0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, 
+  0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 
+  0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0 
+};
+
 int disk_read(BYTE * buf, LBA_t sector, UINT count) {
   uint64_t r;
   UINT i, j;
@@ -86,6 +122,7 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) {
   for (i = 0; i < count; i++) {
     uint16_t crc, crc_exp;
     uint64_t n = 0;
+    uint64_t readCount = 0;
 
     // Wait for data token
     while((r = spi_dummy()) != SD_DATA_TOKEN);
@@ -98,21 +135,45 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) {
     /*   crc = crc16(crc, x); */
     /* } while (--n > 0); */
 
-    n = 512/8;
-    do {
-      // Send 8 dummy bytes (fifo should be empty)
-      for (j = 0; j < 8; j++) {
+    /* n = 512/8; */
+    /* do { */
+    /*   // Send 8 dummy bytes (fifo should be empty) */
+    /*   for (j = 0; j < 8; j++) { */
+    /*     spi_sendbyte(0xff); */
+    /*   } */
+
+    /*   // Reset counter. Process bytes AS THEY COME IN. */
+    /*   for (j = 0; j < 8; j++) { */
+    /*     while (!(read_reg(SPI_IP) & 2)) {} */
+    /*     uint8_t x = spi_readbyte(); */
+    /*     *p++ = x; */
+    /*     // crc = crc16(crc, x); */
+    /*     crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff; */
+    /*   } */
+    /* } while(--n > 0); */
+
+    n = 512;
+    // Initially fill the transmit fifo
+    for (j = 0; j < 8; j++) {
+      spi_sendbyte(0xff);
+    }
+
+    
+    while (n > 0) {
+      // Wait for bytes to be received
+      while (!(read_reg(SPI_IP) & 2)) {}
+      // Read byte
+      uint8_t x = spi_readbyte();
+      // Send another dummy byte
+      if (n > 8) {
         spi_sendbyte(0xff);
       }
-
-      // Reset counter. Process bytes AS THEY COME IN.
-      for (j = 0; j < 8; j++) {
-        while (!(read_reg(SPI_IP) & 2)) {}
-        uint8_t x = spi_readbyte();
-        *p++ = x;
-        crc = crc16(crc, x);
-      }
-    } while(--n > 0);
+      // Place received byte into memory
+      *p++ = x;
+      // Update CRC16 with fast table based method
+      crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff;
+      n = n - 1;
+    }
     
     // Read CRC16 and check
     crc_exp = ((uint16_t)spi_dummy() << 8);
diff --git a/fpga/zsbl/spi.h b/fpga/zsbl/spi.h
index d2bf1191c..f9e88fa6d 100644
--- a/fpga/zsbl/spi.h
+++ b/fpga/zsbl/spi.h
@@ -1,3 +1,32 @@
+///////////////////////////////////////////////////////////////////////
+// spi.h
+//
+// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024
+//
+// Purpose: Header file for interfaceing with the SPI peripheral
+//
+// 
+//
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the
+// “License”); you may not use this file except in compliance with the
+// License, or, at your option, the Apache License version 2.0. You
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work
+// distributed under the License is distributed on an “AS IS” BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
 #pragma once
 #ifndef SPI_HEADER
 #define SPI_HEADER
diff --git a/sim/questa/wally.do b/sim/questa/wally.do
index 0f67cee37..80a7bef76 100644
--- a/sim/questa/wally.do
+++ b/sim/questa/wally.do
@@ -148,11 +148,33 @@ if {$FunctCoverageIndex >= 0} {
     set FCdefineIDV_TRACE2COV "+IDV_TRACE2COV=1"
     set lst [lreplace $lst $FunctCoverageIndex $FunctCoverageIndex]
 }\
+
+set FunctCoverageIndex2 [lsearch -exact $lst "--fcov2"]
+if {$FunctCoverageIndex2 >= 0} {
+    set FunctCoverage 1
+    set riscvISACOVsrc +incdir+$env(IMPERAS_HOME)/ImpProprietary/source/host/riscvISACOV/source
+
+    set FCdefineINCLUDE_TRACE2COV "+define+INCLUDE_TRACE2COV"
+    set FCdefineCOVER_BASE_RV64I "+define+COVER_BASE_RV64I"
+    set FCdefineCOVER_LEVEL_DV_PR_EXT  "+define+COVER_LEVEL_DV_PR_EXT"
+    # Uncomment various cover statements below to control which extensions get functional coverage
+    set FCdefineCOVER_RV64I "+define+COVER_RV64I"
+    #set FCdefineCOVER_RV64M "+define+COVER_RV64M"
+    #set FCdefineCOVER_RV64A "+define+COVER_RV64A"
+    #set FCdefineCOVER_RV64F "+define+COVER_RV64F"
+    #set FCdefineCOVER_RV64D "+define+COVER_RV64D"
+    #set FCdefineCOVER_RV64ZICSR "+define+COVER_RV64ZICSR"
+    #set FCdefineCOVER_RV64C "+define+COVER_RV64C"
+    set FCdefineIDV_INCLUDE_TRACE2COV "+define+IDV_INCLUDE_TRACE2COV"
+    set FCTRACE2COV "+TRACE2COV_ENABLE=1"
+    set FCdefineIDV_TRACE2COV "+IDV_TRACE2COV=1"
+    set lst [lreplace $lst $FunctCoverageIndex2 $FunctCoverageIndex2]
+}\
  
 set LockStepIndex [lsearch -exact $lst "--lockstep"]
 # ugh.  can't have more than 9 arguments passed to vsim. why? I'll have to remove --lockstep when running
 # functional coverage and imply it.
-if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0} {
+if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0 || $FunctCoverageIndex2 >= 0} {
     set lockstep 1
 
     # ideally this would all be one or two variables, but questa is having a real hard time
diff --git a/site-setup.sh b/site-setup.sh
index e2affd031..de10a758e 100755
--- a/site-setup.sh
+++ b/site-setup.sh
@@ -11,6 +11,7 @@
 # Must edit these based on your local environment.
 export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu                   # Change this to your Siemens license server for Questa
 export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu                # Change this to your Synopsys license server
+export IMPERASD_LICENSE_FILE=27020@zircon.eng.hmc.edu               # Change this to your Imperas license server
 export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim          # Change this for your path to Questa, excluding bin
 export DC_HOME=/cad/synopsys/SYN                                    # Change this for your path to Synopsys Design Compiler, excluding bin
 export VCS_HOME=/cad/synopsys/vcs/U-2023.03-SP2-4                   # Change this for your path to Synopsys VCS, excluding bin
diff --git a/src/cvw.sv b/src/cvw.sv
index ed0493484..94006274b 100644
--- a/src/cvw.sv
+++ b/src/cvw.sv
@@ -285,6 +285,8 @@ typedef struct packed {
   int LOGCVTLEN;
   int NORMSHIFTSZ;
   int LOGNORMSHIFTSZ;
+  int NORMSHIFTSZDRSU;
+  int LOGNORMSHIFTSZDRSU;
   int FMALEN;
 
 // division constants
diff --git a/src/fpu/divremsqrt/arithrightshift.sv b/src/fpu/divremsqrt/arithrightshift.sv
new file mode 100644
index 000000000..624a54751
--- /dev/null
+++ b/src/fpu/divremsqrt/arithrightshift.sv
@@ -0,0 +1,9 @@
+
+module arithrightshift import cvw::*;  #(parameter cvw_t P) (
+  input logic signed [P.INTDIVb+3:0] shiftin,
+  output logic signed [P.INTDIVb+3:0] shifted
+);
+  assign shifted = $signed(shiftin) >>> P.LOGR;
+
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv
new file mode 100644
index 000000000..c21267070
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrt.sv
@@ -0,0 +1,110 @@
+///////////////////////////////////////////
+// divremsqrt.sv
+//
+// Written: kekim@hmc.edu
+// Modified:19 May 2023
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+ module divremsqrt import cvw::*;  #(parameter cvw_t P) (
+  input  logic                clk, 
+  input  logic                reset, 
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                XsE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
+  input  logic                XInfE, YInfE, 
+  input  logic                XZeroE, YZeroE, 
+  input  logic                XNaNE, YNaNE, 
+  input  logic                FDivStartE, IDivStartE,
+  input  logic                StallM,
+  input  logic                FlushE,
+  input  logic                SqrtE, SqrtM,
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [2:0]          Funct3E, Funct3M,
+  input  logic                IntDivE, W64E,
+  output logic                DivStickyM,
+  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
+  output logic [P.NE+1:0]      UeM,
+  output logic [P.DIVb:0]      UmM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,
+  output logic                 IntDivM,
+  // integer normalization shifter signals
+  output logic [P.INTDIVb+3:0]          PreResultM,
+  input logic [P.XLEN-1:0]          PreIntResultM,
+  output logic [P.DIVBLEN-1:0]       IntNormShiftM
+
+);
+
+  // Floating-point division and square root module, with optional integer division and remainder
+  // Computes X/Y, sqrt(X), A/B, or A%B
+
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
+  logic                       WZeroE;                       // Early termination flag
+  logic [P.DURLEN:0]         CyclesE;                      // FSM cycles
+  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
+  logic                       DivStartE;                    // Enable signal for flops during stall
+                                                            
+  // Integer div/rem signals                                
+  logic                       BZeroM;                       // Denominator is zero
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic                       NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM;   // Special handling for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic                       ISpecialCaseE;                // Integer div/remainder special cases
+
+
+  divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc(                          // Preprocessor
+    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
+    // Int-specific 
+    .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
+    .BZeroM, .AM, 
+    .IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM);
+
+  fdivsqrtfsm #(P) fdivsqrtfsm(                                  // FSM
+    .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
+    .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
+    // Int-specific 
+    .IDivStartE, .ISpecialCaseE, .IntDivE);
+
+  fdivsqrtiter #(P) fdivsqrtiter(                                // CSA Iterator
+    .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
+    .FirstU, .FirstUM, .FirstC, .FirstWS(WS), .FirstWC(WC));
+
+  divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc(                        // Postprocessor
+    .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
+    .SqrtE, .SqrtM, .SpecialCaseM, 
+    .UmM, .WZeroE, .DivStickyM, 
+    // Int-specific 
+    .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, 
+    .FIntDivResultM,  .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM);
+  
+  
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
new file mode 100644
index 000000000..640735bef
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
@@ -0,0 +1,73 @@
+///////////////////////////////////////////
+// divshiftcalc.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Division shift calculation
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtdivshiftcalc import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.NF+2:0]              DivUm,              // divsqrt significand
+  input  logic [P.NE+1:0]              DivUe,              // divsqrt exponent
+  output logic [P.LOGNORMSHIFTSZDRSU-1:0]  DivShiftAmt,        // divsqrt shift amount
+  output logic [P.NORMSHIFTSZDRSU-1:0]     DivShiftIn,         // divsqrt shift input
+  output logic                         DivResSubnorm,      // is the divsqrt result subnormal
+  output logic                         DivSubnormShiftPos  // is the subnormal shift amount positive
+);
+
+  logic [P.LOGNORMSHIFTSZDRSU-1:0]         NormShift;          // normalized result shift amount
+  logic [P.LOGNORMSHIFTSZDRSU-1:0]         DivSubnormShiftAmt; // subnormal result shift amount (killed if negative)
+  logic [P.NE+1:0]                     DivSubnormShift;    // subnormal result shift amount
+
+  // is the result subnormal
+  // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
+  assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
+
+  // if the result is subnormal
+  //  00000000x.xxxxxx...                     Exp = DivUe
+  //  .00000000xxxxxxx... >> NF+1             Exp = DivUe+NF+1
+  //  .00xxxxxxxxxxxxx... << DivUe+NF+1       Exp = +1
+  //  .0000xxxxxxxxxxx... >> 1                Exp = 1
+  // Left shift amount      = DivUe+NF+1-1
+  assign DivSubnormShift    = (P.NE+2)'(P.NF)+DivUe;
+  assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
+
+  // if the result is normalized
+  //  00000000x.xxxxxx...                     Exp = DivUe
+  //  .00000000xxxxxxx... >> NF+1             Exp = DivUe+NF+1
+  //  00000000.xxxxxxx... << NF               Exp = DivUe+1
+  //  00000000x.xxxxxx... << NF               Exp = DivUe (extra shift done afterwards)
+  //  00000000xx.xxxxx... << 1?               Exp = DivUe-1 (determined after)
+  // inital Left shift amount  = NF
+  // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
+  assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF);
+
+  // if the shift amount is negative then don't shift (keep sticky bit)
+  // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
+  assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0;
+  assign DivShiftAmt        = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
+
+  // pre-shift the divider result for normalization
+  assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}};
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtearlyterm.sv b/src/fpu/divremsqrt/divremsqrtearlyterm.sv
new file mode 100644
index 000000000..464dfdafa
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtearlyterm.sv
@@ -0,0 +1,27 @@
+module divremsqrtearlyterm import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.DIVb+3:0]    WS, WC,            // Q4.DIVb
+  input  logic [P.DIVb+3:0]    D,                 // Q4.DIVb
+  input  logic [P.DIVb:0]      FirstUM,   // U1.DIVb
+  input  logic [P.DIVb+1:0]    FirstC,            // Q2.DIVb
+  input  logic                 SqrtE,
+  output logic                 WZeroE
+);
+  logic weq0E;
+  aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
+  if (P.RADIX == 2) begin: R2EarlyTerm
+    logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
+    logic [P.DIVb+2:0] FirstK;
+    logic wfeq0E;
+    logic [P.DIVb+3:0] WCF, WSF;
+
+    assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
+    assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0};    // F for square root
+    assign FZeroDivE =  D << 1;                                    // F for divide
+    mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
+    csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
+    aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
+    assign WZeroE = weq0E|wfeq0E;
+  end else begin
+    assign WZeroE = weq0E;
+  end 
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv
new file mode 100644
index 000000000..9ca4ef503
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv
@@ -0,0 +1,83 @@
+///////////////////////////////////////////
+// fdivsqrtcycles.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
+// Modified: 18 April 2022
+//
+// Purpose: Determine number of cycles for divsqrt
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtfdivsqrtcycles import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                 SqrtE,
+  input  logic                 IntDivE,
+  input  logic [P.DIVBLEN-1:0] IntResultBitsE,    
+  output logic [P.DURLEN:0]  CyclesE
+);
+
+  logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
+
+  /* verilator lint_off WIDTH */
+  if (P.FPSIZES == 1)
+    assign Nf = P.NF;
+  else if (P.FPSIZES == 2)
+    always_comb
+      case (FmtE)
+        1'b0: Nf = P.NF1;
+        1'b1: Nf = P.NF;
+      endcase
+  else if (P.FPSIZES == 3)
+    always_comb
+      case (FmtE)
+        P.FMT:   Nf = P.NF;
+        P.FMT1:  Nf = P.NF1;
+        P.FMT2:  Nf = P.NF2; 
+        default: Nf = 'x; // shouldn't happen
+      endcase
+  else if (P.FPSIZES == 4)  
+    always_comb
+      case(FmtE)
+        P.S_FMT: Nf = P.S_NF;
+        P.D_FMT: Nf = P.D_NF;
+        P.H_FMT: Nf = P.H_NF;
+        P.Q_FMT: Nf = P.Q_NF;
+      endcase 
+
+  // Cycle logic
+  // P.DIVCOPIES = k. P.LOGR = log(R) = r.  P.RK = rk.  
+  // Integer division needs p fractional + r integer result bits
+  // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
+  // FP Sqrt needs at least Nf fractional bits and 2 guard/round bits.  The integer bit is always initialized to 1 and does not need a cycle.
+  // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
+
+  always_comb begin 
+    FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
+
+    if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
+    else               ResultBitsE = FPResultBitsE;
+
+    CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
+  end 
+  /* verilator lint_on WIDTH */
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv
new file mode 100644
index 000000000..fe1207252
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv
@@ -0,0 +1,79 @@
+///////////////////////////////////////////
+// fdivsqrtexpcalc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Exponent caclulation for divide and square root
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtfdivsqrtexpcalc import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.FMTBITS-1:0] Fmt,
+  input  logic [P.NE-1:0]      Xe, Ye,    // input exponents
+  input  logic                 Sqrt,
+ input  logic [P.DIVBLEN-1:0] ell, m,    // number of leading 0s in Xe and Ye
+  output logic [P.NE+1:0]      Ue         // result exponent
+  );
+  
+  logic [P.NE-2:0] Bias;
+  logic [P.NE+1:0] SXExp;
+  logic [P.NE+1:0] SExp;
+  logic [P.NE+1:0] DExp;
+
+  // Determine exponent bias according to the format
+  
+  if (P.FPSIZES == 1) begin
+    assign Bias = (P.NE-1)'(P.BIAS); 
+
+  end else if (P.FPSIZES == 2) begin
+    assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); 
+
+  end else if (P.FPSIZES == 3) begin
+    always_comb
+      case (Fmt)
+        P.FMT: Bias  =  (P.NE-1)'(P.BIAS);
+        P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
+        P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
+        default: Bias = 'x;
+      endcase
+
+  end else if (P.FPSIZES == 4) begin        
+  always_comb
+    case (Fmt)
+      2'h3: Bias =  (P.NE-1)'(P.Q_BIAS);
+      2'h1: Bias =  (P.NE-1)'(P.D_BIAS);
+      2'h0: Bias =  (P.NE-1)'(P.S_BIAS);
+      2'h2: Bias =  (P.NE-1)'(P.H_BIAS);
+    endcase
+  end
+
+  // Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
+  assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
+  assign SExp  = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
+  
+  // division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
+  assign DExp  = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); 
+
+  // Select square root or division exponent
+  assign Ue = Sqrt ? SExp : DExp;
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
new file mode 100644
index 000000000..87b2ccd0b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
@@ -0,0 +1,116 @@
+///////////////////////////////////////////
+// fdivsqrtpostproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Divide/Square root postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtfdivsqrtpostproc import cvw::*;  #(parameter cvw_t P) (
+  input  logic                 clk, reset,
+  input  logic                 StallM,
+  input  logic [P.DIVb+3:0]    WS, WC,            // Q4.DIVb
+  input  logic [P.DIVb+3:0]    D,                 // Q4.DIVb
+  input  logic [P.DIVb:0]      FirstU, FirstUM,   // U1.DIVb
+  input  logic [P.DIVb+1:0]    FirstC,            // Q2.DIVb
+  input  logic                 SqrtE,
+  input  logic                 SqrtM, SpecialCaseM, 
+  input  logic [P.XLEN-1:0]    AM,                // U/Q(XLEN.0)
+  input  logic                 RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM,
+  input  logic [P.DIVBLEN-1:0] IntNormShiftM,
+  input  logic [P.XLEN-1:0]    PreIntResultM,
+  output logic [P.DIVb:0]      UmM,               // U1.DIVb result significand
+  output logic                 WZeroE,
+  output logic                 DivStickyM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,     // U/Q(XLEN.0)
+  output logic [P.INTDIVb+3:0]    PreResultM
+
+);
+  
+  logic [P.DIVb+3:0]         Sum;
+  logic [P.INTDIVb+3:0]         W;
+  logic [P.DIVb:0]           PreUmM;
+  logic                      NegStickyM;
+  logic                      weq0E, WZeroM;
+  logic [P.XLEN-1:0]         IntDivResultM;
+  logic                      NegQuotM; // Integer quotient is negative
+
+  //////////////////////////
+  // Execute Stage: Detect early termination for an exact result
+  //////////////////////////
+
+  // check for early termination on an exact result. 
+  divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS, .WZeroE);
+  
+
+  //////////////////////////
+  // E/M Pipeline register
+  //////////////////////////
+ 
+  flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
+
+  //////////////////////////
+  // Memory Stage: Postprocessing
+  //////////////////////////
+
+  //  If the result is not exact, the sticky should be set
+  assign DivStickyM = ~WZeroM & ~SpecialCaseM; 
+
+  // Determine if sticky bit is negative *** Full sum only needed for Integer
+  assign Sum = WC + WS;
+  assign NegStickyM = Sum[P.DIVb+3];
+  mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
+  mux2 #(P.DIVb+1)    ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
+
+   // Integer quotient or remainder correction, normalization, and special cases
+  if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
+    logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
+    logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM;
+    logic [P.INTDIVb+3:0] DTrunc, SumTrunc;
+
+    assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb];
+    assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb];
+    arithrightshift #(P) rshift(SumTrunc, W);
+
+    assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]};
+
+    // Integer remainder: sticky and sign correction muxes
+    assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
+    mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM);
+
+    // Select quotient or remainder and do normalization shift
+    mux2 #(P.INTDIVb+4)    presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM);
+    intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM);
+    mux2 #(P.INTDIVb+4)    preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM);
+
+    divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM);
+    // sign extend result for W64
+    if (P.XLEN==64) begin
+      mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0], 
+        {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
+        W64M, FIntDivResultM);
+    end else 
+      assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
+  end
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
new file mode 100644
index 000000000..f39eb7ed5
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
@@ -0,0 +1,250 @@
+///////////////////////////////////////////
+// fdivsqrtpreproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtfdivsqrtpreproc import cvw::*;  #(parameter cvw_t P) (
+  input  logic                 clk,
+  input  logic                 IFDivStartE, 
+  input  logic [P.NF:0]        Xm, Ym,      // Floating-point significands
+  input  logic [P.NE-1:0]      Xe, Ye,      // Floating-point exponents
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                 SqrtE,
+  input  logic                 XZeroE,
+  input  logic [2:0]           Funct3E,
+  output logic [P.NE+1:0]      UeM,         // biased exponent of result
+  output logic [P.DIVb+3:0]    X, D,        // Q4.DIVb
+  // Int-specific
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU 
+  input  logic                 IntDivE, W64E,
+  // Outputs
+  output logic                 ISpecialCaseE,
+  output logic [P.DURLEN:0]  CyclesE,
+  output logic [P.DIVBLEN-1:0] IntNormShiftM,
+  output logic                 ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM,
+  output logic                 AsM, BsM, BZeroM,
+  output logic [P.XLEN-1:0]    AM
+);
+
+  logic [P.DIVb:0]             Xnorm, Dnorm;
+  logic [P.DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
+  logic [P.NE+1:0]             UeE;                                 // Result Exponent (FP only)
+  logic [P.DIVb:0]             IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
+  logic [P.DIVBLEN-1:0]        mE, ell;                             // Leading zeros of inputs
+  logic [P.DIVBLEN-1:0]        IntResultBitsE;                      // bits in integer result
+  logic                        NumerZeroE;                          // Numerator is zero (X or A)
+  logic                        SIGNOVERFLOWE;
+  logic                        AZeroE, BZeroE;                      // A or B is Zero for integer division
+  logic                        SignedDivE;                          // signed division
+  logic                        AsE, BsE;                            // Signs of integer inputs
+  logic [P.XLEN-1:0]           AE;                                  // input A after W64 adjustment
+  logic                        ALTBE;
+  logic                        EvenExp;
+
+  logic [$clog2(P.RK):0] RightShiftX;
+  logic [P.DIVBLEN-1:0] ZeroDiff, p;
+
+
+  //////////////////////////////////////////////////////
+  // Integer Preprocessing
+  //////////////////////////////////////////////////////
+
+  if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
+    logic [P.XLEN-1:0] BE, PosA, PosB;
+
+    // Extract inputs, signs, zero, depending on W64 mode if applicable
+    assign SignedDivE = ~Funct3E[0];
+  
+    // Source handling
+    if (P.XLEN==64) begin // 64-bit, supports W64
+      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
+      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
+    end else begin // 32 bits only
+      assign AE = ForwardedSrcAE;
+      assign BE = ForwardedSrcBE;
+     end
+    assign AZeroE = ~(|AE);
+    assign BZeroE = ~(|BE);
+    assign AsE = AE[P.XLEN-1] & SignedDivE;
+    assign BsE = BE[P.XLEN-1] & SignedDivE; 
+
+    // Force integer inputs to be postiive
+    mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
+    mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
+
+    // Select integer or floating point inputs
+    mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
+    mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
+    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
+  end else begin // Int not supported
+    assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
+    assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
+    assign NumerZeroE = XZeroE;
+  end
+
+  //////////////////////////////////////////////////////
+  // Integer & FP leading zero and normalization shift
+  //////////////////////////////////////////////////////
+
+  // count leading zeros for Subnorm FP and to normalize integer inputs
+  divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell);
+  divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE);
+
+  // Normalization shift: shift leading one into most significant bit
+  assign Xnorm = (IFX << ell);
+  assign Dnorm = (IFD << mE); 
+
+  //////////////////////////////////////////////////////
+  // Integer Right Shift to digit boundary
+  //  Determine DivXShifted (X shifted to digit boundary)
+  //  and nE (number of fractional digits)
+  //////////////////////////////////////////////////////
+
+  assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
+
+  if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
+
+    // calculate number of result bits
+    assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
+    assign ALTBE = ZeroDiff[P.DIVBLEN-1];  // A less than B (A has more leading zeros)
+    assign SIGNOVERFLOWE = 1'b0;
+
+    mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);          
+
+    /* verilator lint_off WIDTH */
+    assign IntResultBitsE = P.LOGR + p;  // Total number of result bits (r integer bits plus p fractional bits)
+   
+    /* verilator lint_on WIDTH */
+
+    // Integer special cases (terminate immediately)
+    assign ISpecialCaseE = BZeroE | ALTBE;
+
+    // calculate right shift amount RightShiftX to complete in discrete number of steps
+    if (P.RK > 1) begin // more than 1 bit per cycle
+      
+      /* verilator lint_offf WIDTH */
+      assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
+      assign DivXShifted = DivX >> RightShiftX;                     // shift X by up to R*K-1 to complete in n steps
+      /* verilator lint_on WIDTH */
+    end else begin // radix 2 1 copy doesn't require shifting
+      assign DivXShifted = DivX;
+      assign RightShiftX = 0;
+    end
+  end else begin
+    assign ISpecialCaseE = 0;
+  end
+
+  //////////////////////////////////////////////////////
+  // Floating-Point Preprocessing
+  // Extend to Q4.b format
+  // shift square root to be in range [1/4, 1)
+  // Normalized numbers are shifted right by 1 if the exponent is odd
+  // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
+   //////////////////////////////////////////////////////
+
+
+  // Sqrt is initialized on step one as R(X-1), so depends on Radix
+  // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
+  // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
+  // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
+  // Now (X-1) is negative.  Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
+  // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
+  // This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
+  // Subtracting 2 is equivalent to adding 1110.  Subtracting 4 is equivalent to adding 1100.  Prepend leading 1s to do a free subtraction.
+  // This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
+  // Radix      Exponent odd          Exponent Even
+  // 2          x-2 = 2(x/2 - 1)      x/2 - 2 = 2(x/4 - 1)
+  // 4          2(x)-4 = 4(x/2 - 1))  2(x/2)-4 = 4(x/4 - 1)
+  // Summary: PreSqrtX = r(x/2or4 - 1)
+
+  logic [P.DIVb:0] PreSqrtX;
+  assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
+  mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even
+
+/*  
+  // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
+  // This saves one bit in DIVb because there is no initial right shift.
+  // However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
+  // That is an optimization for another day.
+  if (P.RADIX == 2) begin
+    logic [P.DIVb:0] PreSqrtX;    // U1.DIVb
+    mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
+    assign SqrtX = {3'b111, PreSqrtX};                          // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
+  end else begin
+    logic [P.DIVb+1:0] PreSqrtX;  // U2.DIVb
+    mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
+    assign SqrtX = {2'b11, PreSqrtX};                     // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
+  end
+*/
+
+  // Initialize X for division or square root
+  mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);                    
+
+  //////////////////////////////////////////////////////
+  // Selet integer or floating-point operands
+  //////////////////////////////////////////////////////
+ if (P.IDIV_ON_FPU) begin
+    mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+  end else begin
+    assign X = PreShiftX;
+  end
+
+  // Divisior register
+  flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
+ 
+  // Floating-point exponent
+  divremsqrtfdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
+  flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
+
+  // Number of FSM cycles (to FSM)
+  divremsqrtfdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
+
+  if (P.IDIV_ON_FPU) begin:intpipelineregs
+    logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
+    logic               RemOpE;
+
+    /* verilator lint_off WIDTH */
+    assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift.  rn = Cycles * r * k - r ***explain
+    assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1));           // m + b - (N-1) for remainder normalization shift
+    /* verilator lint_on WIDTH */
+    assign RemOpE = Funct3E[1];
+    mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
+
+    // pipeline registers
+    flopen #(1)          mdureg(clk, IFDivStartE, IntDivE,  IntDivM);
+    flopen #(1)         altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
+    flopen #(1)        bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
+    flopen #(1)        asignreg(clk, IFDivStartE, AsE,      AsM);
+    flopen #(1)        bsignreg(clk, IFDivStartE, BsE,      BsM);
+    flopen #(P.DIVBLEN)   nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); 
+    flopen #(P.XLEN)    srcareg(clk, IFDivStartE, AE,       AM);
+    if (P.XLEN==64) 
+      flopen #(1)        w64reg(clk, IFDivStartE, W64E,     W64M);
+  end
+
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv
new file mode 100644
index 000000000..dc480637b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtflags.sv
@@ -0,0 +1,183 @@
+
+///////////////////////////////////////////
+// flags.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Post-Processing flag calculation
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtflags import cvw::*;  #(parameter cvw_t P) (
+  input  logic                Xs,                     // X sign
+  input  logic [P.FMTBITS-1:0] OutFmt,                 // output format
+  input  logic                InfIn,                  // is a Inf input being used
+  input  logic                XInf, YInf,             // inputs are infinity
+  input  logic                NaNIn,                  // is a NaN input being used
+  input  logic                XSNaN, YSNaN,           // inputs are signaling NaNs
+  input  logic                XZero, YZero,           // inputs are zero
+  input  logic [P.NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
+  input  logic [P.NE+1:0]      Me,                     // exponent of the normalized sum
+  // rounding
+  input  logic                Plus1,                  // do you add one for rounding
+  input  logic                Round, Guard, Sticky,   // bits used to determine rounding
+  input  logic                UfPlus1,                // do you add one for rounding for the unbounded exponent result
+  // divsqrt
+  input  logic                DivOp,                  // conversion opperation?
+  input  logic                Sqrt,                   // Sqrt?
+  // flags
+  output logic                DivByZero,              // divide by zero flag
+  output logic                Overflow,               // overflow flag to select result
+  output logic                Invalid,                // invalid flag to select the result
+  output logic [4:0]          PostProcFlg             // flags
+);
+
+  logic               SigNaN;         // is an input a signaling NaN
+  logic               Inexact;        // final inexact flag
+  logic               FpInexact;      // floating point inexact flag
+  logic               DivInvalid;     // integer invalid flag
+  logic               Underflow;      // Underflow flag
+  logic               ResExpGteMax;   // is the result greater than or equal to the maximum floating point expoent
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Overflow
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // determine if the result exponent is greater than or equal to the maximum exponent or 
+  // the shift amount is greater than the integers size (for cvt to int)
+  // ShiftGtIntSz calculation:  
+  //      a left shift of intlen+1 is still in range but any more than that is an overflow
+  //              inital: |      64 0's         |    XLEN     |
+  //                      |      64 0's         |    XLEN     | << 64
+  //                      |      XLEN           |    00000... |
+  //      65 = ...0 0 0 0   0 1 0 0   0 0 0 1
+  //          |     or      | |     or      |
+  //      33 = ...0 0 0 0   0 0 1 0   0 0 0 1
+  //          |     or        | |     or    |
+  //      larger or equal if:
+  //          - any of the bits after the most significan 1 is one
+  //          - the most signifcant in 65 or 33 is still a one in the number and
+  //            one of the later bits is one
+  if (P.FPSIZES == 1) begin
+      assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+
+  end else if (P.FPSIZES == 2) begin    
+      assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
+
+  end else if (P.FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+              P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
+              P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
+              default: ResExpGteMax = 1'bx;
+          endcase
+
+  end else if (P.FPSIZES == 4) begin        
+      always_comb
+          case (OutFmt)
+              P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
+              P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
+              P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
+              P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
+          endcase
+  end
+
+
+  // calulate overflow flag:
+  //                 if the result is greater than or equal to the max exponent(not taking into account sign)
+  //                 |           and the exponent isn't negitive
+  //                 |           |                   if the input isnt infinity or NaN
+  //                 |           |                   |            
+  assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Underflow
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // calculate underflow flag: detecting tininess after rounding
+  //                  the exponent is negitive
+  //                  |                    the result is subnormal
+  //                  |                    |                    the result is normal and rounded from a Subnorm
+  //                  |                    |                    |                                      and if given an unbounded exponent the result does not round
+  //                  |                    |                    |                                      |                     and if the result is not exact
+  //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
+  //                  |                    |                    |                                      |                     |               |
+  //assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+  assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Inexact
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
+  //      - Don't set the underflow flag if an underflowed res isn't outputed
+  //assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
+  assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
+
+  //                  if the res is too small to be represented and not 0
+  //                  |                                     and if the res is not invalid (outside the integer bounds)
+  //                  |                                     |
+
+  // select the inexact flag to output
+  assign Inexact = FpInexact;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Invalid
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Set Invalid flag for following cases:
+  //   1) any input is a signaling NaN
+  //   2) Inf - Inf (unless x or y is NaN)
+  //   3) 0 * Inf
+
+  
+  assign SigNaN = (XSNaN) | (YSNaN) ;
+  
+  //invalid flag for division
+  assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
+
+  assign Invalid = SigNaN | (DivInvalid&DivOp);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Divide by Zero
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // if dividing by zero and not 0/0
+  //  - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
+  assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);  
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // final flags
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Combine flags
+  //      - to integer results do not set the underflow or overflow flags
+  assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
+
+endmodule
+
+
+
+
diff --git a/src/fpu/divremsqrt/divremsqrtintspecialcase.sv b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv
new file mode 100644
index 000000000..ff1519aad
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv
@@ -0,0 +1,15 @@
+module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) (
+    input logic BZeroM,RemOpM, ALTBM,
+    input logic [P.XLEN-1:0] AM,
+    input  signed [P.INTDIVb+3:0] PreIntResultM,
+    output logic [P.XLEN-1:0] IntDivResultM
+);
+always_comb
+      if (BZeroM) begin         // Divide by zero
+        if (RemOpM) IntDivResultM = AM;  
+        else        IntDivResultM = {(P.XLEN){1'b1}};
+     end else if (ALTBM) begin // Numerator is small
+        if (RemOpM) IntDivResultM = AM;
+        else        IntDivResultM = 0;
+     end else       IntDivResultM = PreIntResultM[P.XLEN-1:0];
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtlzc.sv b/src/fpu/divremsqrt/divremsqrtlzc.sv
new file mode 100644
index 000000000..1fa14405b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtlzc.sv
@@ -0,0 +1,39 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Leading Zero Counter
+// 
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtlzc #(parameter WIDTH = 1) (
+  input  logic [WIDTH-1:0]            num,    // number to count the leading zeroes of
+  output logic [$clog2(WIDTH)-1:0]  ZeroCnt // the number of leading zeroes
+);
+
+  integer i;
+  
+  always_comb begin
+    i = 0;
+    while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1;  // search for leading one
+    ZeroCnt = i[$clog2(WIDTH)-1:0];
+  end
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtnormshift.sv b/src/fpu/divremsqrt/divremsqrtnormshift.sv
new file mode 100644
index 000000000..4fc51b4ad
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtnormshift.sv
@@ -0,0 +1,81 @@
+///////////////////////////////////////////
+// normshift.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: normalization shifter
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+    // convert shift
+    //      fp -> int:  | `XLEN  zeros      |     Mantissa      | 0's if necessary | << CalcExp
+    //          process:
+    //              - start - CalcExp = 1 + XExp - Largest Bias
+    //                  | `XLEN  zeros      |     Mantissa      | 0's if necessary |
+    //
+    //              - shift left 1 (1)
+    //                  | `XLEN-1 zeros |bit|     frac          | 0's if necessary |
+    //                                      . <- binary point
+    //
+    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
+    //                  |  0's |     Mantissa      |      0's if necessary     |
+    //                  |     keep          |
+    //
+    //      fp -> fp:
+    //          - if result is subnormal or underflowed:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if necessary | << NF+CalcExp-1
+    //          process:
+    //             - start
+    //                 |     mantissa      | 0's |
+    //
+    //             - shift right by NF-1 (NF-1)
+    //                 |    `NF-1  zeros   |     mantissa      | 0's |
+    //
+    //             - shift left by CalcExp = XExp - Largest bias + new bias
+    //                 |   0's  |     mantissa      |     0's      |
+    //                 |       keep      |
+    //
+    //          - if the input is subnormal:
+    //                 |     lzcIn      | 0's if necessary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+    //
+    //      int -> fp: |     lzcIn      | 0's if necessary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+
+    // fma shift
+    //      |   00   |           Sm           | << LZA output
+    //             .
+    //      - two extra bits so we can correct for an LZA error of 1 or 2
+
+    // divsqrt shift
+    //      | Nf 0's |           Qm           | << calculated shift amount
+    //        .
+
+module divremsqrtnormshift import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.LOGNORMSHIFTSZDRSU-1:0]  ShiftAmt,   // shift amount
+  input  logic [P.NORMSHIFTSZDRSU-1:0]     ShiftIn,    // number to be shifted
+  output logic [P.NORMSHIFTSZDRSU-1:0]     Shifted     // shifted result
+);
+   
+  assign Shifted = ShiftIn << ShiftAmt;
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
new file mode 100644
index 000000000..661e48c81
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -0,0 +1,177 @@
+///////////////////////////////////////////
+// postprocess.sv
+//
+// Written: kekim@hmc.edu
+// Modified: 19 May 2023
+//
+// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
+  // general signals
+  input logic                             Xs, Ys,     // input signs
+  input logic  [P.NF:0]                    Xm, Ym,     // input mantissas
+  input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+  input logic  [P.FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
+  input logic  [3:0]                      OpCtrl,     // choose which opperation (look below for values)
+  input logic                             XZero, YZero,        // inputs are zero
+  input logic                             XInf, YInf,          // inputs are infinity
+  input logic                             XNaN, YNaN,          // inputs are NaN
+  input logic                             XSNaN, YSNaN,        // inputs are signaling NaNs
+  input logic  [1:0]                      PostProcSel,         // select result to be written to fp register
+  //fma signals
+  //divide signals
+  input logic                             DivSticky,  // divider sticky bit
+  input logic  [P.NE+1:0]                  DivUe,      // divsqrt exponent
+  input logic  [P.NF+2:0]                  DivUm,      // divsqrt significand
+  input logic  [P.DIVBLEN-1:0]             IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right)
+  input logic  [P.INTDIVb+3:0]          PreResultM, // integer result to be shifted
+  input logic                              IntDivM,
+  // final results
+  output logic [P.FLEN-1:0]                PostProcRes,// postprocessor final result
+  output logic [4:0]                      PostProcFlg, // postprocesser flags
+  output logic [P.XLEN-1:0]  PreIntResultM // normalized integer result
+  );
+
+  
+  // general signals
+  logic                       Rs;         // result sign
+  logic [P.NF-1:0]             Rf;         // Result fraction
+  logic [P.NE-1:0]             Re;         // Result exponent
+  logic                       Ms;         // norMalized sign
+  logic [P.NORMSHIFTSZDRSU-1:0]    Mf;         // norMalized fraction
+  logic [P.NE+1:0]             Me;         // normalized exponent
+  logic [P.NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
+  logic                       UfPlus1;    // do you add one (for determining underflow flag)
+  logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt;   // normalization shift amount
+  logic [P.NORMSHIFTSZDRSU-1:0]    ShiftIn;    // input to normalization shift
+  logic [P.NORMSHIFTSZDRSU-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
+  logic                       Plus1;      // add one to the final result?
+  logic                       Overflow;   // overflow flag used to select results
+  logic                       Invalid;    // invalid flag used to select results
+  logic                       Guard, Round, Sticky; // bits needed to determine rounding
+  logic [P.FMTBITS-1:0]        OutFmt;     // output format
+  // division singals
+  logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt;        // divsqrt shif amount
+  logic [P.NORMSHIFTSZDRSU-1:0]    DivShiftIn;         // divsqrt shift input
+  logic [P.NE+1:0]             Ue;                 // divsqrt corrected exponent after corretion shift
+  logic                       DivByZero;          // divide by zero flag
+  logic                       DivResSubnorm;      // is the divsqrt result subnormal
+  logic                       DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
+  // conversion signals
+  logic [P.CVTLEN+P.NF:0]       CvtShiftIn;         // number to be shifted for converter
+  logic [1:0]                 CvtNegResMsbs;      // most significant bits of possibly negated int result
+  logic [P.XLEN+1:0]           CvtNegRes;          // possibly negated integer result
+  logic                       CvtResUf;           // did the convert result underflow
+  logic                       IntInvalid;         // invalid integer flag
+  // readability signals
+  logic                       Mult;       // multiply opperation
+  logic                       Sqrt;       // is the divsqrt opperation sqrt
+  logic                       Int64;      // is the integer 64 bits?
+  logic                       Signed;     // is the opperation with a signed integer?
+  logic                       IntToFp;    // is the opperation an int->fp conversion?
+  logic                       CvtOp;      // convertion opperation
+  logic                       DivOp;      // divider opperation
+  logic                       InfIn;      // are any of the inputs infinity
+  logic                       NaNIn;      // are any of the inputs NaN
+
+  // signals to help readability
+  
+  assign DivOp = (PostProcSel == 2'b01);
+  assign Sqrt =  OpCtrl[0];
+
+  // is there an input of infinity or NaN being used
+  assign InfIn = XInf|YInf;
+  assign NaNIn = XNaN|YNaN;
+
+  // choose the ouptut format depending on the opperation
+  //      - fp -> fp: OpCtrl contains the percision of the output
+  //      - otherwise: Fmt contains the percision of the output
+  if (P.FPSIZES == 2) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); 
+      assign OutFmt = Fmt;
+  else if (P.FPSIZES == 3 | P.FPSIZES == 4) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 
+      assign OutFmt = Fmt;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Normalization
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // final claulations before shifting
+
+  divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
+
+  assign ShiftAmt = DivShiftAmt;
+  assign ShiftIn = DivShiftIn;
+  
+  // main normalization shift
+  divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted);
+
+  // correct for LZA/divsqrt error
+  divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Rounding
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // round to nearest even
+  // round to zero
+  // round to -infinity
+  // round to infinity
+  // round to nearest max magnitude
+
+  // calulate result sign used in rounding unit
+  divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms);
+
+  divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue,
+      .Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Sign calculation
+  ///////////////////////////////////////////////////////////////////////////////
+
+  assign Rs = Ms;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Flags
+  ///////////////////////////////////////////////////////////////////////////////
+
+  divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, 
+              .Xs, .OutFmt, .Sqrt,
+              .NaNIn, .Round, .DivByZero,
+              .Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1,
+              .Me, .Invalid, .Overflow, .PostProcFlg);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Select the result
+  ///////////////////////////////////////////////////////////////////////////////
+
+  //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
+
+  divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero, 
+      .Frm, .OutFmt, .XNaN, .YNaN,  
+      .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
+      .XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
new file mode 100644
index 000000000..428288783
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -0,0 +1,268 @@
+///////////////////////////////////////////
+// divremsqrtround.sv
+//
+// Written: kekim@hmc.edu, me@KatherineParry.com
+// Modified: 19 May 2023
+//
+// Purpose: Rounder
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+module divremsqrtround import cvw::*;  #(parameter cvw_t P)  (
+  input  logic [P.FMTBITS-1:0]     OutFmt,             // output format
+  input  logic [2:0]              Frm,                // rounding mode
+  input  logic                    Ms,                 // normalized sign
+  input  logic [P.NORMSHIFTSZDRSU-1:0] Mf,                 // normalized fraction
+  // divsqrt
+  input  logic                    DivOp,              // is a division opperation being done
+  input  logic                    DivSticky,          // divsqrt sticky bit
+  input  logic [P.NE+1:0]          Ue,                 // the divsqrt calculated expoent
+  // outputs
+  output logic [P.NE+1:0]          Me,                 // normalied fraction
+  output logic                    UfPlus1,            // do you add one to the result if given an unbounded exponent
+  output logic [P.NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
+  output logic [P.NE-1:0]          Re,                 // Result exponent
+  output logic [P.NF-1:0]          Rf,                 // Result fractionNormS
+  output logic                    Sticky,             // sticky bit
+  output logic                    Plus1,              // do you add one to the final result
+  output logic                    Round, Guard        // bits needed to calculate rounding
+);
+
+  logic           UfCalcPlus1;        // calculated plus one for unbounded exponent
+  logic           NormSticky;         // normalized sum's sticky bit
+  logic [P.NF-1:0] RoundFrac;          // rounded fraction
+  logic           FpGuard, FpRound;   // floating point round/guard bits
+  logic           FpLsbRes;           // least significant bit of floating point result
+  logic           LsbRes;             // lsb of result
+  logic           CalcPlus1;          // calculated plus1
+  logic           FpPlus1;            // do you add one to the fp result 
+  logic [P.FLEN:0] RoundAdd;           // how much to add to the result
+
+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Rounding
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // round to nearest even
+  //      {Round, Sticky}
+  //      0x - do nothing
+  //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+  //          - don't add 1 if a small number was supposed to be subtracted
+  //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+  //         - plus 1 otherwise
+
+  //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+  //  round to -infinity
+  //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+  //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+  //  round to infinity
+  //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+  //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
+
+  //  round to nearest max magnitude
+  //      {Guard, Round, Sticky}
+  //      0x - do nothing
+  //      10 - tie - Plus1
+  //          - don't add 1 if a small number was supposed to be subtracted
+  //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+  //         - Plus 1 otherwise
+
+
+  // determine what format the final result is in: int or fp
+
+  // sticky bit calculation
+  if (P.FPSIZES == 1) begin
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
+
+  end else if (P.FPSIZES == 2) begin
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
+
+
+  end else if (P.FPSIZES == 3) begin
+
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
+
+  end else if (P.FPSIZES == 4) begin
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) | 
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT));
+  end
+  
+
+
+  // only add the Addend sticky if doing an FMA opperation
+  //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
+  //assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm;
+  assign Sticky = DivSticky&DivOp | NormSticky;
+  //assign Sticky = DivSticky&DivOp;
+  
+
+
+
+  // determine round and LSB of the rounded value
+  //      - underflow round bit is used to determint the underflow flag
+  if (P.FPSIZES == 1) begin
+      assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
+      assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
+      assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
+
+  end else if (P.FPSIZES == 2) begin
+      assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
+      assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1];
+      assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
+
+  end else if (P.FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              P.FMT: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
+              end
+              P.FMT1: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
+              end
+              P.FMT2: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2];
+              end
+              default: begin
+                  FpGuard = 1'bx;
+                  FpLsbRes = 1'bx;
+                  FpRound = 1'bx;
+              end
+          endcase
+  end else if (P.FPSIZES == 4) begin
+      always_comb
+          case (OutFmt)
+              2'h3: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2];
+              end
+              2'h1: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2];
+              end
+              2'h0: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2];
+              end
+              2'h2: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2];
+              end
+          endcase
+  end
+
+  
+  assign Guard =  FpGuard;
+  assign LsbRes = FpLsbRes;
+  assign Round =  FpRound;
+
+
+  always_comb begin
+      // Determine if you add 1
+      case (Frm)
+          3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
+          3'b001: CalcPlus1 = 0;//round to zero
+          3'b010: CalcPlus1 = Ms;//round down
+          3'b011: CalcPlus1 = ~Ms;//round up
+          3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
+          default: CalcPlus1 = 1'bx;
+      endcase
+      // Determine if you add 1 (for underflow flag)
+      case (Frm)
+          3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
+          3'b001: UfCalcPlus1 = 0;//round to zero
+          3'b010: UfCalcPlus1 = Ms;//round down
+          3'b011: UfCalcPlus1 = ~Ms;//round up
+          3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
+          default: UfCalcPlus1 = 1'bx;
+      endcase
+  
+  end
+
+  // If an answer is exact don't round
+  assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
+  assign FpPlus1 = Plus1;
+  assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
+
+
+
+
+  // place Plus1 into the proper position for the format
+  if (P.FPSIZES == 1) begin
+      assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
+
+  end else if (P.FPSIZES == 2) begin
+      // \/FLEN+1
+      //  | NE+2 |        NF      |
+      //  '-NE+2-^----NF1----^
+      // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
+      assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
+
+  end else if (P.FPSIZES == 3) begin
+      assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
+
+  end else if (P.FPSIZES == 4)      
+      assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
+
+
+
+  // trim unneeded bits from fraction
+  assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF];
+  
+
+
+  // select the exponent
+  assign Me = Ue;
+
+
+
+  // round the result
+  //      - if the fraction overflows one should be added to the exponent
+  assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
+  assign Re = FullRe[P.NE-1:0];
+
+
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv
new file mode 100644
index 000000000..0f808836a
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv
@@ -0,0 +1,45 @@
+///////////////////////////////////////////
+// divremsqrtroundsign.sv
+//
+// Written: kekim@hmc.edu,me@KatherineParry.com
+// Modified: 19 May 2023
+//
+// Purpose: Sign calculation for rounding
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtroundsign import cvw::*;  #(parameter cvw_t P) (
+  input logic         Xs,     // x sign
+  input logic         Ys,     // y sign
+  input logic         Sqrt,   // sqrt oppertion? (when using divsqrt unit)
+  input logic         DivOp,  // is divsqrt opperation
+  output logic        Ms      // normalized result sign
+);
+
+  logic               Qs;     // divsqrt result sign
+
+  // calculate divsqrt sign
+  assign Qs = Xs^(Ys&~Sqrt);
+
+  // Select sign for rounding calulation
+  assign Ms = (Qs&DivOp);
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
new file mode 100644
index 000000000..a82756e1e
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
@@ -0,0 +1,94 @@
+///////////////////////////////////////////
+// divremsqrtshiftcorrection.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: shift correction
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module divremsqrtshiftcorrection import cvw::*;  #(parameter cvw_t P) (
+  input logic  [P.NORMSHIFTSZDRSU-1:0] Shifted,                // the shifted sum before LZA correction
+  // divsqrt
+  input logic                     DivOp,                  // is it a divsqrt opperation
+  input logic                     DivResSubnorm,          // is the divsqrt result subnormal
+  input logic  [P.NE+1:0]          DivUe,                  // the divsqrt result's exponent
+  input logic                     DivSubnormShiftPos,     // is the subnorm divider shift amount positive (ie not underflowed)
+  //fma
+  //input logic                     FmaOp,                  // is it an fma opperation
+  //input logic  [P.NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
+  //input logic                     FmaPreResultSubnorm,    // is the result subnormal - calculated before LZA corection
+  //input logic                     FmaSZero,
+  // output
+  //output logic [P.NE+1:0]          FmaMe,                  // exponent of the normalized sum
+  output logic [P.NORMSHIFTSZDRSU-1:0] Mf,                     // the shifted sum before LZA correction
+  output logic [P.NE+1:0]          Ue                      // corrected exponent for divider
+);
+
+  logic [P.NORMSHIFTSZDRSU-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
+  logic [P.NORMSHIFTSZDRSU-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
+  logic                       ResSubnorm;                 // is the result Subnormal
+  logic                       LZAPlus1;                   // add one or two to the sum's exponent due to LZA correction
+  logic                       LeftShiftQm;                // should the divsqrt result be shifted one to the left
+
+  // LZA correction
+  assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1];
+
+  // correct the shifting error caused by the LZA
+  //  - the only possible mantissa for a plus two is all zeroes 
+  //      - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
+  //mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted);
+
+  // correct the shifting of the divsqrt caused by producing a result in (2, .5] range
+  //    condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
+  assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
+  //assign LeftShiftQm = ((DivUe==1));
+  assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}};
+  assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}};
+  mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
+  
+  // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
+  always_comb
+    //if(FmaOp)                       Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}};
+    //if (DivOp&~DivResSubnorm)  Mf = CorrQmShifted;
+    if (~DivResSubnorm)  Mf = CorrQmShifted;
+    else                       Mf = Shifted[P.NORMSHIFTSZDRSU-1:0];
+    
+  // Determine sum's exponent
+  //  main exponent issues: 
+  //      - LZA was one too large
+  //      - LZA was two too large
+  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
+  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
+  //                          if plus1                    If plus2                               kill if the result Zero or actually subnormal
+  //                          |                           |                                      |
+  //assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
+  
+  // recalculate if the result is subnormal after LZA correction
+  //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1];
+
+  // the quotent is in the range [.5,2) if there is no early termination
+  // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
+  assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
+  //assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
new file mode 100644
index 000000000..975c6de3c
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -0,0 +1,240 @@
+///////////////////////////////////////////
+// divremsqrtspecialcase.sv
+//
+// Written: kekim@hmc.edu,me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: special case selection
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module divremsqrtspecialcase import cvw::*;  #(parameter cvw_t P) (
+  input  logic                Xs,         // X sign
+  input  logic [P.NF:0]        Xm, Ym, // input significand's
+  input  logic                XNaN, YNaN, // are the inputs NaN
+  input  logic [2:0]          Frm,        // rounding mode
+  input  logic [P.FMTBITS-1:0] OutFmt,     // output format
+  input  logic                InfIn,      // are any inputs infinity
+  input  logic                NaNIn,      // are any input NaNs
+  input  logic                XInf, YInf, // are X or Y inifnity
+  input  logic                XZero,      // is X zero
+  input  logic                Plus1,      // do you add one for rounding
+  input  logic                Rs,         // the result's sign
+  input  logic                Invalid, Overflow,  // flags to choose the result
+  input  logic [P.NE-1:0]      Re,         // Result exponent
+  input  logic [P.NE+1:0]      FullRe,     // Result full exponent
+  input  logic [P.NF-1:0]      Rf,         // Result fraction
+  // divsqrt
+  input  logic                DivOp,      // is it a divsqrt opperation
+  input  logic                DivByZero,  // divide by zero flag
+  // outputs
+  output logic [P.FLEN-1:0]    PostProcRes // final result
+);
+
+  logic [P.FLEN-1:0]   XNaNRes;    // X is NaN result
+  logic [P.FLEN-1:0]   YNaNRes;    // Y is NaN result
+  logic [P.FLEN-1:0]   InvalidRes; // Invalid result result
+  logic [P.FLEN-1:0]   UfRes;      // underflowed result result
+  logic [P.FLEN-1:0]   OfRes;      // overflowed result result
+  logic [P.FLEN-1:0]   NormRes;    // normal result
+  logic               OfResMax;   // does the of result output maximum norm fp number
+  logic               KillRes;    // kill the result for underflow
+  logic               SelOfRes;   // should the overflow result be selected
+
+
+  // does the overflow result output the maximum normalized floating point number
+  //                output infinity if the input is infinity
+  assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
+
+  // select correct outputs for special cases
+  if (P.FPSIZES == 1) begin
+      //NaN res selection depending on standard
+      if(P.IEEE754) begin
+          assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+          assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+      end else begin
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+      end
+
+      assign OfRes =  OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+      assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = {Rs, Re, Rf};
+
+  end else if (P.FPSIZES == 2) begin
+      if(P.IEEE754) begin
+          assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+          assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+      end else begin 
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+      end
+
+      always_comb
+          if(OutFmt)
+              if(OfResMax)    OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
+              else            OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+          else
+              if(OfResMax)    OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
+              else            OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+      assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
+
+  end else if (P.FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              P.FMT: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end else begin 
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end
+                  
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {Rs, Re, Rf};
+              end
+              P.FMT1: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+                      YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+                  end
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+                  UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
+              end
+              P.FMT2: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
+                      YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
+                  UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
+              end
+              default: begin
+                  if(P.IEEE754) begin
+                      XNaNRes = (P.FLEN)'(0);
+                      YNaNRes = (P.FLEN)'(0);
+                      InvalidRes = (P.FLEN)'(0);
+                  end else begin 
+                      InvalidRes = (P.FLEN)'(0);
+                  end
+                  OfRes = (P.FLEN)'(0);
+                  UfRes = (P.FLEN)'(0);
+                  NormRes = (P.FLEN)'(0);
+              end
+          endcase
+
+  end else if (P.FPSIZES == 4) begin 
+      always_comb
+          case (OutFmt)
+              2'h3: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end else begin 
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end
+                  
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {Rs, Re, Rf};
+              end
+              2'h1: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
+                      YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
+                  end
+                  OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
+                  UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
+              end
+              2'h0: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
+                      YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
+                  UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
+              end
+              2'h2: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
+                      YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};      
+                // zero is exact if dividing by infinity so don't add 1
+                  UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
+              end
+          endcase
+  end
+
+  // determine if you shoould kill the res - Cvt
+  //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+  //      - dont set to zero if fp input is zero but not using the fp input
+  //      - dont set to zero if int input is zero but not using the int input
+  assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
+  
+  // calculate if the overflow result should be selected
+  assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
+  
+  // output infinity with result sign if divide by zero
+  if(P.IEEE754)
+    always_comb
+      if(XNaN)                    PostProcRes = XNaNRes;
+      else if(YNaN)               PostProcRes = YNaNRes;
+      else if(Invalid)            PostProcRes = InvalidRes;
+      else if(SelOfRes)           PostProcRes = OfRes;
+      else if(KillRes)            PostProcRes = UfRes;
+      else                        PostProcRes = NormRes;
+  else
+    always_comb
+      if(NaNIn|Invalid)           PostProcRes = InvalidRes;
+      else if(SelOfRes)           PostProcRes = OfRes;
+      else if(KillRes)            PostProcRes = UfRes;
+      else                        PostProcRes = NormRes;
+
+endmodule
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
new file mode 100644
index 000000000..2385cac20
--- /dev/null
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -0,0 +1,102 @@
+///////////////////////////////////////////
+// drsu.sv
+//
+// Written: kekim@hmc.edu
+// Modified:19 May 2023
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module drsu import cvw::*;  #(parameter cvw_t P) (
+  input  logic                clk, 
+  input  logic                reset, 
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                XsE, YsE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
+  input  logic                XInfE, YInfE, 
+  input  logic                XZeroE, YZeroE, 
+  input  logic                XNaNE, YNaNE, 
+  input  logic                XSNaNE, YSNaNE,
+  input  logic                FDivStartE, IDivStartE,
+  input  logic                StallM,
+  input  logic                FlushE,
+  input  logic                SqrtE, SqrtM,
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [2:0]          Funct3E, Funct3M,
+  input  logic                IntDivE, W64E,
+  input  logic [2:0]          Frm,
+  input  logic [3:0]          OpCtrl,
+  input  logic [1:0]          PostProcSel,
+  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
+  output logic [P.FLEN-1:0]    FResM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,
+  output logic [4:0]          FlgM
+);
+
+  // Floating-point division and square root module, with optional integer division and remainder
+  // Computes X/Y, sqrt(X), A/B, or A%B
+
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
+  logic                       Firstun;                      // Quotient selection
+  logic                       WZeroE;                       // Early termination flag
+  logic [P.DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
+  logic                       DivStartE;                    // Enable signal for flops during stall
+                                                            
+  // Integer div/rem signals                                
+  logic                       BZeroM;                       // Denominator is zero
+  logic                       IntDivM;                      // Integer operation
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic                       ISpecialCaseE;                // Integer div/remainder special cases
+  logic [P.DIVb:0]             UmM;
+  logic [P.NF+2:0]             UmMexact; //U1.NF+2
+  logic [P.NE+1:0]             UeM;
+  logic                       DivStickyM;
+  logic [P.INTDIVb+3:0]          PreResultM;
+  logic [P.XLEN-1:0]          PreIntResultM;
+  logic [P.DIVBLEN-1:0]       IntNormShiftM;
+
+  divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 
+            .XeE, .YeE, .SqrtE, .SqrtM,
+                    .XInfE, .YInfE, .XZeroE, .YZeroE, 
+            .XNaNE, .YNaNE, 
+                    .FDivStartE, .IDivStartE, .W64E,
+                    .StallM, .DivStickyM, .FDivBusyE, .UeM,
+                    .UmM,
+                    .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
+                    .Funct3E, .IntDivE, .FIntDivResultM, .IntDivM,
+                    .FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM);
+  assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs
+  divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM,
+    .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), 
+    .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM),
+    .PreIntResultM, .PreResultM, .IntNormShiftM);
+endmodule
+
diff --git a/src/fpu/divremsqrt/intrightshift.sv b/src/fpu/divremsqrt/intrightshift.sv
new file mode 100644
index 000000000..dd4f47aeb
--- /dev/null
+++ b/src/fpu/divremsqrt/intrightshift.sv
@@ -0,0 +1,37 @@
+///////////////////////////////////////////
+// fdivsqrtpostproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Divide/Square root postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module intrightshift import cvw::*;  #(parameter cvw_t P) (
+  input logic signed [P.INTDIVb+3:0] shiftin,
+  input logic [P.DIVBLEN-1:0] shiftamt,
+  output logic signed [P.INTDIVb+3:0] shifted
+);
+  assign shifted = shiftin >> shiftamt;
+
+endmodule
diff --git a/testbench/common/wallyTracer.sv b/testbench/common/wallyTracer.sv
index 80f7af651..5676f6c13 100644
--- a/testbench/common/wallyTracer.sv
+++ b/testbench/common/wallyTracer.sv
@@ -169,12 +169,17 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi);
 	  CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
 	  CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
 	  CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
-	  CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW;
+	  CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[P.XLEN-1:0];
 	  // user CSRs
 	  CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
 	  CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW;
 	  CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
 	
+    if (P.XLEN == 32) begin
+      CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrsr.MSTATUSH_REGW;
+      CSRArray[12'h31A] = testbench.dut.core.priv.priv.csr.csrm.MENVCFGH_REGW;
+      CSRArray[12'h15D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[63:32];
+    end
 	end else begin // hold the old value if the pipeline is stalled.
 
       // PMP CFG 3A0 to 3AF
diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
new file mode 100644
index 000000000..9ca2e5b61
--- /dev/null
+++ b/testbench/testbench-fp.sv
@@ -0,0 +1,1682 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com, james.stine@okstate.edu
+//
+// Purpose: Testbench for UCB Testfloat on Wally
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "config.vh"
+`include "tests-fp.vh"
+
+import cvw::*;
+
+module testbenchfp;
+   // Two parameters TEST, TEST_SIZE used with testfloat.do in sim dir
+   // to run specific precisions (e.g., quad or all)
+   parameter TEST="none";
+   parameter TEST_SIZE="none";
+
+  `include "parameter-defs.vh"   
+
+   //parameter MAXVECTORS = 8388610;
+   parameter MAXVECTORS = 100000;
+
+   // FIXME: needs cleaning of unused variables (jes)
+   string                       Tests[];                    // list of tests to be run
+   logic [3:0] 			OpCtrl[];                   // list of op controls
+   logic [2:0] 			Unit[];                     // list of units being tested
+   logic                        WriteInt[];                 // Is being written to integer resgiter
+   logic [2:0] 			Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
+   //logic [2:0] 			Frm[4:0] = {3'b011, 3'b011, 3'b011, 3'b011, 3'b011}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 *** MODIFIED ROUNDING MODES
+   logic [1:0] 			Fmt[];                      // list of formats for the other units  
+
+   logic                        clk=0;
+   logic [31:0] 		TestNum=0;                  // index for the test
+   logic [31:0] 		OpCtrlNum=0;                // index for OpCtrl
+   logic [31:0] 		errors=0;                   // how many errors
+   logic [31:0] 		VectorNum=0;                // index for test vector
+   logic [31:0] 		FrmNum=0;                   // index for rounding mode
+   logic [P.Q_LEN*4+7:0] 	TestVectors[MAXVECTORS:0];     // list of test vectors
+
+   logic [1:0] 			FmtVal;                     // value of the current Fmt
+   logic [2:0] 			UnitVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
+   logic [3:0]          OpCtrlVal;
+   logic                        WriteIntVal;                // value of the current WriteInt
+   logic [P.FLEN-1:0] 		X, Y, Z;                    // inputs read from TestFloat
+   logic [P.FLEN-1:0] 		XPostBox;                   // inputs read from TestFloat
+   logic [P.XLEN-1:0] 		SrcA, SrcB;                       // integer input
+   logic                  W64;                        // is W64 instruction
+   logic [P.FLEN-1:0] 		Ans;                        // correct answer from TestFloat
+   logic [P.FLEN-1:0] 		Res;                        // result from other units
+   logic [4:0] 			AnsFlg;                     // correct flags read from testfloat
+   logic [4:0] 			ResFlg, Flg;                // Result flags
+   logic [P.FMTBITS-1:0] 	ModFmt;                     // format - 10 = half, 00 = single, 01 = double, 11 = quad
+   logic [P.FLEN-1:0] 		FpRes, FpCmpRes;            // Results from each unit
+   logic [P.XLEN-1:0] 		IntRes, CmpRes;             // Results from each unit
+   logic [4:0] 			FmaFlg, CvtFlg, DivFlg;     // Outputed flags
+   logic [4:0] 			CmpFlg;                     // Outputed flags
+   logic                        AnsNaN, ResNaN, NaNGood;
+   logic                        Xs, Ys, Zs;                 // sign of the inputs
+   logic [P.NE-1:0] 		Xe, Ye, Ze;                 // exponent of the inputs
+   logic [P.NF:0] 		Xm, Ym, Zm;                 // mantissas of the inputs
+   logic                        XNaN, YNaN, ZNaN;           // is the input NaN
+   logic                        XSNaN, YSNaN, ZSNaN;        // is the input a signaling NaN
+   logic                        XSubnorm, ZSubnorm;         // is the input denormalized
+   logic                        XInf, YInf, ZInf;           // is the input infinity
+   logic                        XZero, YZero, ZZero;        // is the input zero
+   logic                        XExpMax, YExpMax, ZExpMax;  // is the input's exponent all ones  
+   logic [P.CVTLEN-1:0] 	CvtLzcInE;                  // input to the Leading Zero Counter (priority encoder)
+   logic                        IntZero;
+   logic                        CvtResSgnE;
+   logic [P.NE:0] 		CvtCalcExpE;                // the calculated exponent
+   logic [P.LOGCVTLEN-1:0] 	CvtShiftAmtE;               // how much to shift by
+   logic [P.DIVb:0] 		Quot;
+   logic                        CvtResSubnormUfE;
+   logic                        DivStart=0;
+   logic 			FDivBusyE;
+   logic 			OldFDivBusyE;
+   logic                        reset = 1'b0;
+   logic [$clog2(P.NF+2)-1:0] 	XZeroCnt, YZeroCnt;
+
+   // in-between FMA signals
+   logic                        Mult;
+   logic                        Ss;
+   logic [P.NE+1:0] 		Pe;
+   logic [P.NE+1:0] 		Se;
+   logic 			ASticky;
+   logic 			KillProd; 
+   logic [$clog2(3*P.NF+5)-1:0] SCnt;
+   logic [3*P.NF+3:0] 		Sm;       
+   logic 			InvA;
+   logic 			NegSum;
+   logic 			As;
+   logic 			Ps;
+   logic                        DivSticky;
+   logic                        DivDone;
+   logic                        DivNegSticky;
+   logic [P.NE+1:0] 		DivCalcExp;
+   logic                        divsqrtop;
+
+   // Missing logic vectors fdivsqrt
+   logic [2:0] 			Funct3E;
+   logic [2:0] 			Funct3M;
+   logic 			FlushE;
+   logic 			IFDivStartE;
+   logic      IDivStart;
+   logic 			FDivDoneE;
+   logic [P.NE+1:0] 		UeM;
+   logic [P.DIVb:0] 		UmM;
+   logic [P.XLEN-1:0] 		FIntDivResultM;
+   logic 			ResMatch;                   // Check if result match
+   logic 			FlagMatch;                  // Check if IEEE flags match
+   logic 			CheckNow;                   // Final check
+   logic 			FMAop;                      // Is this a FMA operation?
+   logic      IntDivE;                    // Is Integer operation on FPU?
+
+   // FSM for testing each item per clock
+   typedef enum logic [2:0] {S0, Start, S2, Done} statetype;
+   statetype state, nextstate;   
+   
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //     ||||||||| |||||||| ||||||| |||||||||   ||||||| |||||||| |||
+   //        |||    |||      |||        |||      |||     |||      |||
+   //        |||    |||||||| |||||||    |||      ||||||| |||||||| |||
+   //        |||    |||          |||    |||          ||| |||      |||
+   //        |||    |||||||| |||||||    |||      ||||||| |||||||| |||||||||
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // select tests relevent to the specified configuration
+   //    cvtint - test integer conversion unit (fcvtint)
+   //    cvtfp  - test floating-point conversion unit (fcvtfp)
+   //    cmp    - test comparison unit's LT, LE, EQ opperations (fcmp)
+   //    add    - test addition
+   //    sub    - test subtraction
+   //    div    - test division
+   //    sqrt   - test square root
+   //    all    - test all of the above
+   flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M));
+
+   initial begin
+      // Information displayed for user on what is simulating
+      // $display("\nThe start of simulation...");      
+      $display("\nThe start of simulation... INTDIVb: %d, DIVB: %d, DIVBLEN: %d , RK: %d",INTDIVb, DIVb, DIVBLEN, RK);      
+      // $display("This simulation for TEST is %s", TEST);
+      if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
+         if (TEST === "cvtint" | TEST === "all") begin  // if testing integer conversion
+            // add the 128-bit cvtint tests to the to-be-tested list
+            Tests = {Tests, f128rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are supported add their conversions
+               Tests = {Tests, f128rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+            end
+         end 
+         // if the floating-point conversions are being tested          
+         if (TEST === "cvtfp" | TEST === "all") begin  
+            if (P.D_SUPPORTED) begin // if double precision is supported
+               // add the 128 <-> 64 bit conversions to the to-be-tested list
+               Tests = {Tests, f128f64cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b01, 3'b11};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+            end
+            if (P.F_SUPPORTED) begin // if single precision is supported
+               // add the 128 <-> 32 bit conversions to the to-be-tested list
+               Tests = {Tests, f128f32cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b00, 3'b11};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+            end
+            if (P.ZFH_SUPPORTED) begin // if half precision is supported
+               // add the 128 <-> 16 bit conversions to the to-be-tested list
+               Tests = {Tests, f128f16cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b10, 3'b11};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
+            // add the compare tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
+            // add the addition tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+            // add the subtraction tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+            // add the multiply tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the divide tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "fma" | TEST === "all") begin  // if fused-mutliply-add is being tested
+            Tests = {Tests, f128fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+      end
+      if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported
+         if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested
+            Tests = {Tests, f64rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are being supported
+               Tests = {Tests, f64rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+            end
+         end
+         if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
+            if (P.F_SUPPORTED) begin // if single precision is supported
+               // add the 64 <-> 32 bit conversions to the to-be-tested list
+               Tests = {Tests, f64f32cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b00, 3'b01};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+            end
+            if (P.ZFH_SUPPORTED) begin // if half precision is supported
+               // add the 64 <-> 16 bit conversions to the to-be-tested list
+               Tests = {Tests, f64f16cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b10, 3'b01};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
+            Tests = {Tests, f64fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+      end
+      if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported
+         if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
+            Tests = {Tests, f32rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are supported
+               Tests = {Tests, f32rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+            end
+         end
+         if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
+            if (P.ZFH_SUPPORTED) begin 
+               // add the 32 <-> 16 bit conversions to the to-be-tested list
+               Tests = {Tests, f32f16cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b10, 3'b00};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiply is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "fma" | TEST === "all")  begin // if fma is being tested
+            Tests = {Tests, f32fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+      end
+      if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported
+         if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested
+            Tests = {Tests, f16rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are supported
+               Tests = {Tests, f16rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin //  if addition is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end 
+         end
+         if (TEST === "fma" | TEST === "all") begin // if fma is being tested
+            Tests = {Tests, f16fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+      end
+      if (P.IDIV_ON_FPU |1'b1) begin
+        if (P.Q_SUPPORTED) begin
+           if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f128div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b11};
+           end
+         end
+         if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+        end
+        if (P.D_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f64div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b01};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f64sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+        end
+        if (P.S_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f32div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b00};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f32sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+
+        end
+        if (P.ZFH_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f16div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b10};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f16sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+        end
+        if (P.XLEN == 64 & P.IDIV_ON_FPU) begin
+         if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested
+            Tests = {Tests, int64rem};
+            OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested
+            Tests = {Tests, int64div};
+            OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested
+            Tests = {Tests, int64remu};
+            OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested
+            Tests = {Tests, int64divu};
+            OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer remainder is being tested
+            Tests = {Tests, int64remw};
+            OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer remainder is being tested
+            Tests = {Tests, int64remuw};
+            OpCtrl = {OpCtrl, `INTREMUW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer division is being tested
+            Tests = {Tests, int64divw};
+            OpCtrl = {OpCtrl, `INTDIVW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer divison is being tested
+            Tests = {Tests, int64divuw};
+            OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+        end
+        // RV32 
+        else if (P.IDIV_ON_FPU) begin 
+         if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested
+            Tests = {Tests, int32rem};
+            OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested
+            Tests = {Tests, int32div};
+            OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested
+            Tests = {Tests, int32remu};
+            OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested
+            Tests = {Tests, int32divu};
+            OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+        end
+      end
+      // check if nothing is being tested
+
+      $display("This simulation for TEST contains %d vectors", Tests.size);      
+      if (Tests.size() == 0) begin
+         $display("TEST %s not supported in this configuration", TEST);
+         $stop;
+      end
+   end
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //     ||||||||| |||||||| ||||||||| |||||||     ||||||||| |||||||| ||||||| |||||||||   
+   //     |||   ||| |||      |||   ||| ||   ||        |||    |||      |||        |||      
+   //     ||||||||  |||||||| ||||||||| ||   ||        |||    |||||||| |||||||    |||      
+   //     |||  ||   |||      |||   ||| ||   ||        |||    |||          |||    |||      
+   //     |||   ||| |||||||| |||   ||| |||||||        |||    |||||||| |||||||    |||      
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // Read the first test
+   initial begin
+      //string testname = {`PATH, Tests[TestNum]}; 
+      static string pp = `PATH;
+      string testname;
+      string tt0;
+      tt0 = $psprintf("%s", Tests[TestNum]);
+      testname = {pp, tt0};
+      //$display("Here you are %s", testname);     
+      $display("\n\nRunning %s vectors ", Tests[TestNum]);
+      $readmemh(testname, TestVectors);
+
+      // set the test index to 0
+      TestNum = 0;
+   end
+
+   // set a the signals for all tests
+   always_comb UnitVal = Unit[TestNum];
+   always_comb FmtVal = Fmt[TestNum];
+   always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
+   always_comb WriteIntVal = WriteInt[OpCtrlNum];
+   always_comb FrmVal = Frm[FrmNum];
+
+   // modify the format signal if only 2 percisions supported
+   //    - 1 for the larger precision
+   //    - 0 for the smaller precision
+   always_comb begin
+      if (P.FMTBITS == 1) ModFmt = FmtVal == P.FMT;
+      else ModFmt = FmtVal;
+   end
+
+   // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
+   readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
+                                 .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB,
+                                 .Xs, .Ys, .Zs, .Unit(UnitVal),
+                                 .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
+                                 .Xm, .Ym, .Zm, 
+                                 .XNaN, .YNaN, .ZNaN,
+                                 .XSNaN, .YSNaN, .ZSNaN, 
+                                 .XSubnorm, .ZSubnorm, 
+                                 .XZero, .YZero, .ZZero,
+                                 .XInf, .YInf, .ZInf, .XExpMax, .Funct3E, .W64,
+                                 .X, .Y, .Z, .XPostBox);
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //     |||||||   |||   ||| ||||||||| 
+   //     |||   ||| |||   |||    |||    
+   //     |||   ||| |||   |||    |||    
+   //     |||   ||| |||   |||    |||         
+   //     |||||||   |||||||||    |||    
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // instantiate devices under test
+   if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma
+      fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
+                   .Xe(Xe), .Ye(Ye), .Ze(Ze), 
+                   .Xm(Xm), .Ym(Ym), .Zm(Zm),
+                   .XZero, .YZero, .ZZero, .Ss, .Se,
+                   .OpCtrl(OpCtrlVal[2:0]), .Sm, .InvA, .SCnt, .As, .Ps,
+                   .ASticky); 
+   end
+   
+   if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
+      fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
+                      .XZero(XZero), .OpCtrl(OpCtrlVal[2:0]), .IntZero,
+                      .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), 
+                      .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
+   end
+
+   if (TEST === "cmp" | TEST === "all") begin: fcmp
+      fcmp #(P) fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal[2:0]), .Xs, .Ys, .Xe, .Ye, 
+                   .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
+                   .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
+   end
+   
+   if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
+      fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+                             .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+                             .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+                             .XNaNE(XNaN), .YNaNE(YNaN), 
+                             .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                             .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
+                             .UmM(Quot),
+                             .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
+                             .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
+                             .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
+   end
+   if (TEST === "fdivremsqrt" | TEST === "div_drsu" | TEST === "sqrt_drsu" | TEST === "intdivrem" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" ) begin: divremsqrt
+    drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+      .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL),
+      .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]),
+      .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), 
+      .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64),
+      .StallM(1'b0), .FDivBusyE,
+      .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
+      .Funct3E(Funct3E), .IntDivE(IntDivE), 
+      .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
+  end
+  else begin: postprocess
+    postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+                .OpCtrl(OpCtrlVal[2:0]), .DivUm(Quot), .DivUe(DivCalcExp),
+                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+                .FmaASticky(ASticky), .FmaSe(Se),
+                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+  end
+
+   assign CmpFlg[3:0] = 0;
+
+   // produce clock
+   always begin
+      clk = 1; #5; clk = 0; #5;
+   end
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //          |||||      |||  ||||||||||  |||||      |||
+   //          |||||||    |||  |||    |||  |||||||    |||
+   //          |||| |||   |||  ||||||||||  |||| |||   |||
+   //          ||||  |||  |||  |||    |||  ||||  |||  |||
+   //          ||||   ||| |||  |||    |||  ||||   ||| |||
+   //          ||||    ||||||  |||    |||  ||||    ||||||
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // Check if the correct answer and result is a NaN
+   always_comb begin
+      if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
+         // an integer output can't be a NaN
+         AnsNaN = 1'b0;
+         ResNaN = 1'b0;
+      end
+      else if (UnitVal === `CVTFPUNIT) begin
+         case (OpCtrlVal[1:0])
+           2'b11: begin // quad             
+              AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]);
+              ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]);
+           end
+           2'b01: begin // double                 
+              AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]);
+              ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]);
+           end
+           2'b00: begin // single
+              AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]);
+              ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]);
+           end
+           2'b10: begin // half
+              AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
+              ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
+           end
+         endcase
+      end
+      else begin
+         case (FmtVal)
+           2'b11: begin // quad             
+              AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]);
+              ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]);
+           end
+           2'b01: begin // double                 
+              AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]);
+              ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]);
+           end
+           2'b00: begin // single
+              AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]);
+              ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]);
+           end
+           2'b10: begin // half
+              AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
+              ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
+           end
+         endcase
+      end
+   end 
+   
+   always_comb begin
+      // select the result to check
+      case (UnitVal)
+        `FMAUNIT: Res = FpRes;
+        `DIVUNIT: Res = FpRes;
+        `CMPUNIT: Res = CmpRes;
+        `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
+        `CVTFPUNIT: Res = FpRes;
+        `INTDIVUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
+      endcase
+
+      // select the flag to check
+      case (UnitVal)
+        `FMAUNIT: ResFlg = Flg;
+        `DIVUNIT: ResFlg = Flg;
+        `CMPUNIT: ResFlg = CmpFlg;
+        `CVTINTUNIT: ResFlg = Flg;
+        `CVTFPUNIT: ResFlg = Flg;
+        `INTDIVUNIT: ResFlg = Flg;
+      endcase 
+
+      // Use four state test sequence to handle div properly.
+      // Four states should allow other operations to finish
+      // properly and within time.
+      case (state)
+        S0: begin
+           DivStart = 1'b0;
+           nextstate = Start;
+        end
+        Start: begin
+           if (UnitVal == `DIVUNIT | (UnitVal == `INTDIVUNIT & (OpCtrlVal == `SQRT_OPCTRL | OpCtrlVal == `DIV_OPCTRL))) begin 
+             DivStart = 1'b1;
+             IntDivE = 1'b0;
+           end
+           else if (UnitVal == `INTDIVUNIT) begin
+             IDivStart = 1'b1;
+             IntDivE = 1'b1;
+           end
+           else
+             DivStart = 1'b0;	  
+           nextstate = S2;
+        end
+        S2: begin
+           DivStart = 1'b0;	  
+           IDivStart = 1'b0;
+           if ((FDivBusyE|~DivDone)&(UnitVal == `DIVUNIT | UnitVal == `INTDIVUNIT))
+             nextstate = S2;
+           else
+             nextstate = Done;
+        end
+        Done: begin
+           DivStart = 1'b0;
+           IDivStart = 1'b0;
+           IntDivE = 1'b0;
+           nextstate = S0;
+        end	
+      endcase // case (state)
+      
+   end 
+
+   // Provide reset for divsqrt to reset state
+   initial
+     begin
+        #0  reset = 1'b1;
+        #25 reset = 1'b0;     
+     end   
+
+   // Left-over from before - will remove soon
+   always @(posedge clk) 
+   OldFDivBusyE = FDivDoneE;
+
+   // state machine to handle timing for testing due
+   // various cycle counts for different fp/int operations
+   // Adds vector at start of clock
+   always @(posedge clk) begin
+
+      // state machine element for testing
+      if (reset)
+        state <= S0;
+      else
+        state <= nextstate;      
+
+      // Increment the vector when Done with each test
+      if (state == Done)
+        VectorNum += 1; // increment the vector
+      
+   end
+
+   // check results on falling edge of clk
+   always @(negedge clk) begin
+      // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
+      //    - the sign of the NaN does not matter for the opperations being tested
+      //    - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
+      if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
+         case (FmtVal)
+            2'b11: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | 
+                              (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]})) |
+                              (ZNaN&(Res[P.Q_LEN-2:0] === {Z[P.Q_LEN-2:P.Q_NF],1'b1,Z[P.Q_NF-2:0]})));
+            2'b01: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | 
+                              (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]})) |
+                              (ZNaN&(Res[P.D_LEN-2:0] === {Z[P.D_LEN-2:P.D_NF],1'b1,Z[P.D_NF-2:0]})));
+            2'b00: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | 
+                              (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]})) |
+                              (ZNaN&(Res[P.S_LEN-2:0] === {Z[P.S_LEN-2:P.S_NF],1'b1,Z[P.S_NF-2:0]})));
+            2'b10: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | 
+                              (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) |
+                              (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]})));
+         endcase
+      else if (UnitVal === `CVTFPUNIT) // if converting from FP to FP OpCtrl contains the final FP format
+         case (OpCtrlVal[1:0]) 
+            2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | 
+                              (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | 
+                              (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]})));
+            2'b01: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.D_LEN-2:0] === Ans[P.D_LEN-2:0])) | 
+                              (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | 
+                              (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]})));
+            2'b00: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.S_LEN-2:0] === Ans[P.S_LEN-2:0])) | 
+                              (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | 
+                              (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]})));
+            2'b10: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | 
+                              (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | 
+                              (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})));
+         endcase
+      else NaNGood = 1'b0; // integers can't be NaNs
+
+         
+      ///////////////////////////////////////////////////////////////////////////////////////////////
+
+      //     ||||||| |||    ||| ||||||| ||||||| |||   |||
+      //     |||     |||    ||| |||     |||     |||  |||
+      //     |||     |||||||||| ||||||| |||     ||||||
+      //     |||     |||    ||| |||     |||     |||  |||
+      //     ||||||| |||    ||| ||||||| ||||||| |||    |||
+
+      ///////////////////////////////////////////////////////////////////////////////////////////////
+
+      // check if result is correct
+      assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
+      assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal ==`INTDIVU_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTREMW_OPCTRL) | (OpCtrlVal == `INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) ; 
+      assign FMAop = (OpCtrlVal == `FMAUNIT);  
+      assign DivDone = OldFDivBusyE & ~FDivBusyE;
+      //assign DivDone =  ~FDivBusyE;
+      //assign DivDone =  FDivDoneE;
+      assign CheckNow = ((DivDone | ~divsqrtop) | 
+                         (TEST == "add" | TEST == "fma" | TEST == "sub") |
+                         ((TEST == "all") & (DivDone | ~divsqrtop)));
+            
+      if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin
+         errors += 1;
+         $display("\nError in %s", Tests[TestNum]);
+         $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
+         $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
+         $stop;
+      end
+      
+      if (TestVectors[VectorNum][100:0] === 101'bx & Tests[TestNum] !== "" ) begin // if reached the eof
+         // increment the test
+         TestNum += 1;
+         // clear the vectors
+         for(int i=0; i<MAXVECTORS; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}};
+         // read next files
+         $readmemh({`PATH, Tests[TestNum]}, TestVectors);
+         // set the vector index back to 0
+         VectorNum = 0;
+         // incemet the operation if all the rounding modes have been tested
+         if (FrmNum === 4 | WriteIntVal == 1'b1) OpCtrlNum += 1;
+         // increment the rounding mode or loop back to rne 
+         if (FrmNum < 4) FrmNum += 1;
+         else begin
+            FrmNum = 0;
+            // Add some time as a buffer between tests at the end of each test
+            // (to be removed)
+            repeat (10)
+              @(posedge clk);
+         end
+         
+         $display("Running %s vectors", Tests[TestNum]);
+      end
+      // if no more Tests - finish
+      if (Tests[TestNum] === "") begin
+              $display("\nAll Tests completed with %d errors\n", errors);
+              $stop;
+      end 
+   end
+endmodule
+
+
+module readvectors import cvw::*; #(parameter cvw_t P) (
+                    input logic 		clk,
+                    input logic [P.Q_LEN*4+7:0] 	TestVector,
+                    input logic [P.FMTBITS-1:0] ModFmt,
+                    input logic [1:0] 		Fmt,
+                    input logic [2:0] 		Unit,
+                    input logic [31:0] 		VectorNum,
+                    input logic [31:0] 		TestNum,
+                    input logic [3:0] 		OpCtrl,
+                    output logic [P.FLEN-1:0] 	Ans,
+                    output logic [P.XLEN-1:0] 	SrcA,
+                    output logic [P.XLEN-1:0] 	SrcB,
+                    output logic [4:0] 		AnsFlg,
+                    output logic 		Xs, Ys, Zs, // sign bits of XYZ
+                    output logic [P.NE-1:0] 	Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
+                    output logic [P.NF:0] 	Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
+                    output logic 		XNaN, YNaN, ZNaN, // is XYZ a NaN
+                    output logic 		XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
+                    output logic 		XSubnorm, ZSubnorm, // is XYZ denormalized
+                    output logic 		XZero, YZero, ZZero, // is XYZ zero
+                    output logic 		XInf, YInf, ZInf, // is XYZ infinity
+                    output logic 		XExpMax,
+                    output logic [2:0]          Funct3E,
+                    output logic                W64,
+                    output logic [P.FLEN-1:0] 	X, Y, Z, XPostBox
+                    );
+
+   localparam Q_LEN = 32'd128;
+   
+   logic 					XEn;
+   logic 					YEn;
+   logic 					ZEn;
+   logic 					FPUActive;   
+
+   // apply test vectors on rising edge of clk
+   // Format of vectors Inputs(1/2/3)_AnsFlg
+   always @(posedge clk) begin
+      AnsFlg = TestVector[4:0];
+      case (Unit)
+        `FMAUNIT:
+          case (Fmt)
+            2'b11: begin // quad
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = TestVector[8+4*(P.Q_LEN)-1:8+3*(P.Q_LEN)];
+                  Y = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                  Z = TestVector[8+2*(P.Q_LEN)-1:8+P.Q_LEN];
+               end
+               else begin
+                  X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                  if (OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)]; else Y = {2'b0, {P.Q_NE-1{1'b1}}, (P.Q_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+               end
+               Ans = TestVector[8+(P.Q_LEN-1):8];
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+4*(P.D_LEN)-1:8+3*(P.D_LEN)]};
+                  Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                  Z = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+P.D_LEN]};
+               end
+               else begin
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]}; 
+                  else Y = {{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+               end
+               Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+4*(P.S_LEN)-1:8+3*(P.S_LEN)]};
+                  Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                  Z = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+P.S_LEN]};
+               end
+               else begin
+                  X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]}; 
+                  else Y = {{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]};
+               end
+               Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+            end
+            2'b10: begin // half
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+4*(P.H_LEN)-1:8+3*(P.H_LEN)]};
+                  Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                  Z = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+P.H_LEN]};
+               end
+               else begin
+                  X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]}; 
+                  else Y = {{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+               end
+               Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+            end
+          endcase
+        `DIVUNIT:
+          if (OpCtrl[0])
+            case (Fmt)
+              2'b11: begin // quad
+                 X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                 Ans = TestVector[8+(P.Q_LEN-1):8];
+              end
+              2'b01: if (P.D_SUPPORTED) begin // double
+                 X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                 Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+              end
+              2'b00: if (P.S_SUPPORTED) begin // single
+                 X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                 Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+              end
+              2'b10: begin // half
+                 X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                 Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+              end
+            endcase
+          else
+            case (Fmt)
+              2'b11: begin // quad
+                 X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                 Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                 Ans = TestVector[8+(P.Q_LEN-1):8];
+              end
+              2'b01: if (P.D_SUPPORTED) begin // double
+                 X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                 Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                 Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+              end
+              2'b00: if (P.S_SUPPORTED) begin // single
+                 X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                 Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                 Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+              end
+              2'b10: begin // half
+                 X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                 Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                 Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+              end
+            endcase
+         `INTDIVUNIT: begin
+            if (!(OpCtrl === `DIV_OPCTRL | OpCtrl === `SQRT_OPCTRL)) begin
+               SrcA = TestVector[2*(P.Q_LEN)+P.XLEN-1+12:2*(P.Q_LEN)+12]; 
+               SrcB = TestVector[(P.Q_LEN)+P.XLEN-1+12:P.Q_LEN+12];
+               Ans = TestVector[P.XLEN-1+12:12];
+               // no flag checking for intdiv test cases
+               AnsFlg = 5'bx;
+               case (OpCtrl)
+               `INTDIV_OPCTRL: begin
+                  Funct3E = 3'b100;
+                  W64 = 1'b0;
+               end
+               `INTREM_OPCTRL: begin
+                  Funct3E = 3'b110;
+                  W64 = 1'b0;
+               end
+               `INTREMU_OPCTRL: begin
+                  Funct3E = 3'b111;
+                  W64 = 1'b0;
+               end
+               `INTDIVU_OPCTRL: begin
+                  Funct3E = 3'b101;
+                  W64 = 1'b0;
+               end
+               `INTDIVW_OPCTRL: begin
+                  Funct3E = 3'b100;
+                  W64 = 1'b1;
+               end
+               `INTDIVUW_OPCTRL: begin
+                  Funct3E = 3'b101;
+                  W64 = 1'b1;
+               end
+               `INTREMW_OPCTRL: begin
+                     Funct3E = 3'b110;
+                     W64 = 1'b1;
+               end
+               `INTREMUW_OPCTRL: begin
+                  Funct3E = 3'b111;
+                  W64 = 1'b1;
+               end
+               default: begin
+                  Funct3E = 3'b000;
+                  W64 = 1'b0;
+               end
+               endcase
+            end
+            // testing div/sqrt on drsu
+            else begin
+               if (OpCtrl[0])
+                  case (Fmt)
+                  2'b11: begin // quad
+                     X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                     Ans = TestVector[8+(P.Q_LEN-1):8];
+                  end
+                  2'b01: if (P.D_SUPPORTED) begin // double
+                     X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                     Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                  end
+                  2'b00: if (P.S_SUPPORTED) begin // single
+                     X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                     Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                  end
+                  2'b10: begin // half
+                     X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                     Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                  end
+                  endcase
+               else
+                  case (Fmt)
+                  2'b11: begin // quad
+                     X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                     Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                     Ans = TestVector[8+(P.Q_LEN-1):8];
+                  end
+                  2'b01: if (P.D_SUPPORTED) begin // double
+                     X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                     Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                     Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                  end
+                  2'b00: if (P.S_SUPPORTED) begin // single
+                     X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                     Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                     Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                  end
+                  2'b10: begin // half
+                     X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                     Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                     Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                  end
+                  endcase
+            end
+         end
+        `CMPUNIT:
+          case (Fmt)        
+            2'b11: begin // quad
+               X = TestVector[12+2*(P.Q_LEN)-1:12+(P.Q_LEN)];
+               Y = TestVector[12+(P.Q_LEN)-1:12];
+               Ans = TestVector[8];
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[12+2*(P.D_LEN)-1:12+(P.D_LEN)]};
+               Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[12+(P.D_LEN)-1:12]};
+               Ans = TestVector[8];
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[12+2*(P.S_LEN)-1:12+(P.S_LEN)]};
+               Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[12+(P.S_LEN)-1:12]};
+               Ans = TestVector[8];
+            end
+            2'b10: begin // half
+               X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[12+2*(P.H_LEN)-1:12+(P.H_LEN)]};
+               Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[12+(P.H_LEN)-1:12]};
+               Ans = TestVector[8];
+            end
+          endcase
+        `CVTFPUNIT:
+          case (Fmt)
+            2'b11: begin // quad
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {TestVector[8+P.Q_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {TestVector[8+P.Q_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {TestVector[8+P.Q_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {TestVector[8+P.Q_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	begin // double
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+            2'b10: begin // half
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	if (P.S_SUPPORTED) begin // single
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+          endcase        
+        `CVTINTUNIT:
+          case (Fmt)
+            2'b11: begin // quad
+               // {is the integer a long, is the opperation to an integer}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> quad
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.Q_LEN+P.XLEN-1:8+(P.Q_LEN)];
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b10:	begin // int -> quad
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.Q_LEN+32-1]}}, TestVector[8+P.Q_LEN+32-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	begin // quad -> long
+                    X = {TestVector[8+P.XLEN+P.Q_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // quad -> int
+                    X = {TestVector[8+32+P.Q_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               // {Int->Fp?, is the integer a long}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> double
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.D_LEN+P.XLEN-1:8+(P.D_LEN)];
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> double
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.D_LEN+32-1]}}, TestVector[8+P.D_LEN+32-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b01:	begin // double -> long
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // double -> int
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               // {is the integer a long, is the opperation to an integer}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> single
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.S_LEN+P.XLEN-1:8+(P.S_LEN)];
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> single
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.S_LEN+32-1]}}, TestVector[8+P.S_LEN+32-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b01:	begin // single -> long
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // single -> int
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+            2'b10: begin // half
+               // {is the integer a long, is the opperation to an integer}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> half
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.H_LEN+P.XLEN-1:8+(P.H_LEN)];
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> half
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.H_LEN+32-1]}}, TestVector[8+P.H_LEN+32-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+                 2'b01:	begin // half -> long
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // half -> int
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+          endcase
+      endcase  
+   end
+
+   assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]);
+   assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]) | ((Unit == `INTDIVUNIT) & OpCtrl === `SQRT_OPCTRL));
+   assign ZEn = (Unit == `FMAUNIT);
+   assign FPUActive = 1'b1;
+   
+   unpack #(P) unpack(.X, .Y, .Z, .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
+                      .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
+                      .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
+                      .XEn, .YEn, .ZEn, .XExpMax, .XPostBox);
+
+endmodule
diff --git a/testbench/testbench.sv b/testbench/testbench.sv
index d214ef3b6..f91bdcc67 100644
--- a/testbench/testbench.sv
+++ b/testbench/testbench.sv
@@ -762,7 +762,7 @@ end
     void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR,            "riscv.ovpworld.org"));
     void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME,              "riscv"));
     void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT,           "RV64GCK"));
-    void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH,     56));
+    void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH,     XLEN==64 ? 56 : 34));
     void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6));
 
     if(elffilename == "buildroot") filename = "";    
@@ -824,15 +824,25 @@ end
     void'(rvviRefCsrSetVolatile(0, 32'hC02));   // INSTRET
     void'(rvviRefCsrSetVolatile(0, 32'hB02));   // MINSTRET
     void'(rvviRefCsrSetVolatile(0, 32'hC01));   // TIME
-    
+    if (P.XLEN == 32) begin
+      void'(rvviRefCsrSetVolatile(0, 32'hC80));   // CYCLEH
+      void'(rvviRefCsrSetVolatile(0, 32'hB80));   // MCYCLEH
+      void'(rvviRefCsrSetVolatile(0, 32'hC82));   // INSTRETH
+      void'(rvviRefCsrSetVolatile(0, 32'hB82));   // MINSTRETH
+      void'(rvviRefCsrSetVolatile(0, 32'hC81));   // TIMEH 
+    end
     // User HPMCOUNTER3 - HPMCOUNTER31
     for (iter='hC03; iter<='hC1F; iter++) begin
       void'(rvviRefCsrSetVolatile(0, iter));   // HPMCOUNTERx
+      if (P.XLEN == 32) 
+        void'(rvviRefCsrSetVolatile(0, iter+128));   // HPMCOUNTERxH
     end       
     
     // Machine MHPMCOUNTER3 - MHPMCOUNTER31
     for (iter='hB03; iter<='hB1F; iter++) begin
       void'(rvviRefCsrSetVolatile(0, iter));   // MHPMCOUNTERx
+      if (P.XLEN == 32) 
+        void'(rvviRefCsrSetVolatile(0, iter+128));   // MHPMCOUNTERxH
     end       
     
     // cannot predict this register due to latency between
diff --git a/testbench/testbench_fp.sv b/testbench/testbench_fp.sv
index 61fa12fcc..1617d392c 100644
--- a/testbench/testbench_fp.sv
+++ b/testbench/testbench_fp.sv
@@ -23,26 +23,28 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 `include "config.vh"
-`include "tests_fp.vh"
+`include "tests-fp.vh"
 
 import cvw::*;
 
 module testbench_fp;
    // Two parameters TEST, TEST_SIZE used with testfloat.do in sim dir
    // to run specific precisions (e.g., quad or all)
-   parameter string TEST="none"; // choices are cvtint, cvtfp, cmp, add, sub, mul, div, sqrt, fma; all does not check properly
-   parameter string TEST_SIZE="all";
+   parameter TEST="none";
+   parameter TEST_SIZE="none";
 
   `include "parameter-defs.vh"   
 
-   parameter MAXVECTORS = 8388610;
+   //parameter MAXVECTORS = 8388610;
+   parameter MAXVECTORS = 100000;
 
    // FIXME: needs cleaning of unused variables (jes)
    string                       Tests[];                    // list of tests to be run
-   logic [2:0] 			OpCtrl[];                   // list of op controls
+   logic [3:0] 			OpCtrl[];                   // list of op controls
    logic [2:0] 			Unit[];                     // list of units being tested
    logic                        WriteInt[];                 // Is being written to integer resgiter
    logic [2:0] 			Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
+   //logic [2:0] 			Frm[4:0] = {3'b011, 3'b011, 3'b011, 3'b011, 3'b011}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 *** MODIFIED ROUNDING MODES
    logic [1:0] 			Fmt[];                      // list of formats for the other units  
 
    logic                        clk=0;
@@ -51,22 +53,23 @@ module testbench_fp;
    logic [31:0] 		errors=0;                   // how many errors
    logic [31:0] 		VectorNum=0;                // index for test vector
    logic [31:0] 		FrmNum=0;                   // index for rounding mode
-   logic [P.Q_LEN*4+7:0] 	TestVectors[MAXVECTORS-1:0];     // list of test vectors
+   logic [P.Q_LEN*4+7:0] 	TestVectors[MAXVECTORS:0];     // list of test vectors
 
    logic [1:0] 			FmtVal;                     // value of the current Fmt
-   logic [2:0] 			UnitVal, OpCtrlVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
+   logic [2:0] 			UnitVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
+   logic [3:0]          OpCtrlVal;
    logic                        WriteIntVal;                // value of the current WriteInt
-   logic [P.Q_LEN-1:0] 		X, Y, Z;                    // inputs read from TestFloat
+   logic [P.FLEN-1:0] 		X, Y, Z;                    // inputs read from TestFloat
    logic [P.FLEN-1:0] 		XPostBox;                   // inputs read from TestFloat
-   logic [P.XLEN-1:0] 		SrcA;                       // integer input
-   logic [P.Q_LEN-1:0] 		Ans;                        // correct answer from TestFloat
-   logic [P.Q_LEN-1:0] 		Res;                        // result from other units
+   logic [P.XLEN-1:0] 		SrcA, SrcB;                       // integer input
+   logic                  W64;                        // is W64 instruction
+   logic [P.FLEN-1:0] 		Ans;                        // correct answer from TestFloat
+   logic [P.FLEN-1:0] 		Res;                        // result from other units
    logic [4:0] 			AnsFlg;                     // correct flags read from testfloat
    logic [4:0] 			ResFlg, Flg;                // Result flags
    logic [P.FMTBITS-1:0] 	ModFmt;                     // format - 10 = half, 00 = single, 01 = double, 11 = quad
    logic [P.FLEN-1:0] 		FpRes, FpCmpRes;            // Results from each unit
    logic [P.XLEN-1:0] 		IntRes, CmpRes;             // Results from each unit
-   logic [P.Q_LEN-1:0]     FpResExtended;              // FpRes extended to same length as Ans/Res
    logic [4:0] 			FmaFlg, CvtFlg, DivFlg;     // Outputed flags
    logic [4:0] 			CmpFlg;                     // Outputed flags
    logic                        AnsNaN, ResNaN, NaNGood;
@@ -99,8 +102,8 @@ module testbench_fp;
    logic [P.NE+1:0] 		Se;
    logic 			ASticky;
    logic 			KillProd; 
-   logic [$clog2(P.FMALEN+1)-1:0] SCnt;
-   logic [P.FMALEN-1:0] 		Sm;       
+   logic [$clog2(3*P.NF+5)-1:0] SCnt;
+   logic [3*P.NF+3:0] 		Sm;       
    logic 			InvA;
    logic 			NegSum;
    logic 			As;
@@ -116,6 +119,7 @@ module testbench_fp;
    logic [2:0] 			Funct3M;
    logic 			FlushE;
    logic 			IFDivStartE;
+   logic      IDivStart;
    logic 			FDivDoneE;
    logic [P.NE+1:0] 		UeM;
    logic [P.DIVb:0] 		UmM;
@@ -124,9 +128,7 @@ module testbench_fp;
    logic 			FlagMatch;                  // Check if IEEE flags match
    logic 			CheckNow;                   // Final check
    logic 			FMAop;                      // Is this a FMA operation?
-
-   logic [P.NE-2:0]             BiasE;                              // Bias of exponent
-   logic [P.LOGFLEN-1:0]        NfE;                                // Number of fractional bits
+   logic      IntDivE;                    // Is Integer operation on FPU?
 
    // FSM for testing each item per clock
    typedef enum logic [2:0] {S0, Start, S2, Done} statetype;
@@ -150,16 +152,16 @@ module testbench_fp;
    //    sub    - test subtraction
    //    div    - test division
    //    sqrt   - test square root
-   //    all    - test all of the above < doesn't report errors properly >
-   
+   //    all    - test all of the above
+   flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M));
+
    initial begin
       // Information displayed for user on what is simulating
       // $display("\nThe start of simulation...");      
+      $display("\nThe start of simulation... INTDIVb: %d, DIVB: %d, DIVBLEN: %d , RK: %d",INTDIVb, DIVb, DIVBLEN, RK);      
       // $display("This simulation for TEST is %s", TEST);
-      // $display("This simulation for TEST is of the operand size of %s", TEST_SIZE);      
-
-   if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
-	 if (TEST === "cvtint" | TEST === "all") begin  // if testing integer conversion
+      if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
+         if (TEST === "cvtint" | TEST === "all") begin  // if testing integer conversion
             // add the 128-bit cvtint tests to the to-be-tested list
             Tests = {Tests, f128rv32cvtint};
             // add the op-codes for these tests to the op-code list
@@ -177,13 +179,13 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b11};
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b11};
                end
             end
-	 end 
-	 // if the floating-point conversions are being tested          
-	 if (TEST === "cvtfp" | TEST === "all") begin  
+         end 
+         // if the floating-point conversions are being tested          
+         if (TEST === "cvtfp" | TEST === "all") begin  
             if (P.D_SUPPORTED) begin // if double precision is supported
                // add the 128 <-> 64 bit conversions to the to-be-tested list
                Tests = {Tests, f128f64cvt};
@@ -192,12 +194,12 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b11};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b01};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
                end
             end
             if (P.F_SUPPORTED) begin // if single precision is supported
@@ -208,12 +210,12 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b11};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b00};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
                end
             end
             if (P.ZFH_SUPPORTED) begin // if half precision is supported
@@ -224,16 +226,16 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b11};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b10};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
+         end
+         if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
             // add the compare tests/op-ctrls/unit/fmt
             Tests = {Tests, f128cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -242,8 +244,8 @@ module testbench_fp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin // if addition is being tested
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
             // add the addition tests/op-ctrls/unit/fmt
             Tests = {Tests, f128add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -252,8 +254,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
             // add the subtraction tests/op-ctrls/unit/fmt
             Tests = {Tests, f128sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -262,8 +264,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
             // add the multiply tests/op-ctrls/unit/fmt
             Tests = {Tests, f128mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -272,8 +274,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the divide tests/op-ctrls/unit/fmt
             Tests = {Tests, f128div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -282,8 +284,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
             // add the square-root tests/op-ctrls/unit/fmt
             Tests = {Tests, f128sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -292,8 +294,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "fma" | TEST === "all") begin  // if fused-mutliply-add is being tested
+         end
+         if (TEST === "fma" | TEST === "all") begin  // if fused-mutliply-add is being tested
             Tests = {Tests, f128fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -301,10 +303,10 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
+         end
       end
       if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported
-	 if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested
+         if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested
             Tests = {Tests, f64rv32cvtint};
             // add the op-codes for these tests to the op-code list
             OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
@@ -321,12 +323,12 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b01};
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b01};
                end
             end
-	 end
-	 if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
+         end
+         if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
             if (P.F_SUPPORTED) begin // if single precision is supported
                // add the 64 <-> 32 bit conversions to the to-be-tested list
                Tests = {Tests, f64f32cvt};
@@ -335,12 +337,12 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b01};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b00};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
                end
             end
             if (P.ZFH_SUPPORTED) begin // if half precision is supported
@@ -351,16 +353,16 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b01};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b10};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -369,8 +371,8 @@ module testbench_fp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin // if addition is being tested
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -379,8 +381,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -389,8 +391,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -399,8 +401,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -409,8 +411,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -419,8 +421,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
+         end
+         if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
             Tests = {Tests, f64fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -428,10 +430,10 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
+         end
       end
       if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported
-	 if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
+         if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
             Tests = {Tests, f32rv32cvtint};
             // add the op-codes for these tests to the op-code list
             OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
@@ -448,12 +450,12 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b00};
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b00};
                end
             end
-	 end
-	 if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
+         end
+         if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
             if (P.ZFH_SUPPORTED) begin 
                // add the 32 <-> 16 bit conversions to the to-be-tested list
                Tests = {Tests, f32f16cvt};
@@ -462,16 +464,16 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b00};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b10};
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -480,8 +482,8 @@ module testbench_fp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin // if addition is being tested
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -490,8 +492,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -500,8 +502,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiply is being tested
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiply is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -510,8 +512,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -520,8 +522,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -530,8 +532,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "fma" | TEST === "all")  begin // if fma is being tested
+         end
+         if (TEST === "fma" | TEST === "all")  begin // if fma is being tested
             Tests = {Tests, f32fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -539,10 +541,10 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
+         end
       end
       if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported
-	 if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested
+         if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested
             Tests = {Tests, f16rv32cvtint};
             // add the op-codes for these tests to the op-code list
             OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
@@ -559,12 +561,12 @@ module testbench_fp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b10};
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -573,8 +575,8 @@ module testbench_fp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin //  if addition is being tested
+         end
+         if (TEST === "add" | TEST === "all") begin //  if addition is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -583,8 +585,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -593,8 +595,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -603,8 +605,8 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -613,8 +615,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -623,8 +625,8 @@ module testbench_fp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b10};
             end 
-	 end
-	 if (TEST === "fma" | TEST === "all") begin // if fma is being tested
+         end
+         if (TEST === "fma" | TEST === "all") begin // if fma is being tested
             Tests = {Tests, f16fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -632,12 +634,194 @@ module testbench_fp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
+         end
+      end
+      if (P.IDIV_ON_FPU |1'b1) begin
+        if (P.Q_SUPPORTED) begin
+           if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f128div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b11};
+           end
+         end
+         if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+        end
+        if (P.D_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f64div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b01};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f64sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+        end
+        if (P.S_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f32div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b00};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f32sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+
+        end
+        if (P.ZFH_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f16div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b10};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f16sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+        end
+        if (P.XLEN == 64 & P.IDIV_ON_FPU) begin
+         if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested
+            Tests = {Tests, int64rem};
+            OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested
+            Tests = {Tests, int64div};
+            OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested
+            Tests = {Tests, int64remu};
+            OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested
+            Tests = {Tests, int64divu};
+            OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer remainder is being tested
+            Tests = {Tests, int64remw};
+            OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer remainder is being tested
+            Tests = {Tests, int64remuw};
+            OpCtrl = {OpCtrl, `INTREMUW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer division is being tested
+            Tests = {Tests, int64divw};
+            OpCtrl = {OpCtrl, `INTDIVW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer divison is being tested
+            Tests = {Tests, int64divuw};
+            OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+        end
+        // RV32 
+        else if (P.IDIV_ON_FPU) begin 
+         if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested
+            Tests = {Tests, int32rem};
+            OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested
+            Tests = {Tests, int32div};
+            OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested
+            Tests = {Tests, int32remu};
+            OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested
+            Tests = {Tests, int32divu};
+            OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+        end
       end
       // check if nothing is being tested
+
+      $display("This simulation for TEST contains %d vectors", Tests.size);      
       if (Tests.size() == 0) begin
-	 $display("TEST %s not supported in this configuration", TEST);
-	 $stop;
+         $display("TEST %s not supported in this configuration", TEST);
+         $stop;
       end
    end
 
@@ -657,18 +841,17 @@ module testbench_fp;
       static string pp = `PATH;
       string testname;
       string tt0;
-      tt0 = $sformatf("%s", Tests[TestNum]);
+      tt0 = $psprintf("%s", Tests[TestNum]);
       testname = {pp, tt0};
       //$display("Here you are %s", testname);     
-      // clear the vectors
-      for(int i=0; i<MAXVECTORS; i++) TestVectors[i] = '1; 
-       $display("\n\nRunning %s vectors ", Tests[TestNum]);
+      $display("\n\nRunning %s vectors ", Tests[TestNum]);
       $readmemh(testname, TestVectors);
+
       // set the test index to 0
       TestNum = 0;
    end
 
-   // set the signals for all tests
+ // set the signals for all tests
    always_ff @(posedge clk) begin
       UnitVal = Unit[TestNum];
       FmtVal = Fmt[TestNum];
@@ -676,18 +859,26 @@ module testbench_fp;
       WriteIntVal = WriteInt[OpCtrlNum];
       FrmVal = Frm[FrmNum];
    end
+   /*
+   // set a the signals for all tests
+   always_comb UnitVal = Unit[TestNum];
+   always_comb FmtVal = Fmt[TestNum];
+   always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
+   always_comb WriteIntVal = WriteInt[OpCtrlNum];
+   always_comb FrmVal = Frm[FrmNum];
+   */
 
-   // modify the format signal if only 2 precisions supported
+   // modify the format signal if only 2 percisions supported
    //    - 1 for the larger precision
    //    - 0 for the smaller precision
    always_comb begin
-      if (P.FMTBITS == 1) ModFmt = {1'b0, FmtVal == P.FMT};
+      if (P.FMTBITS == 1) ModFmt = FmtVal == P.FMT;
       else ModFmt = FmtVal;
    end
 
    // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
    readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
-                                 .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
+                                 .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB,
                                  .Xs, .Ys, .Zs, .Unit(UnitVal),
                                  .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
                                  .Xm, .Ym, .Zm, 
@@ -695,8 +886,8 @@ module testbench_fp;
                                  .XSNaN, .YSNaN, .ZSNaN, 
                                  .XSubnorm, .ZSubnorm, 
                                  .XZero, .YZero, .ZZero,
-                                 .XInf, .YInf, .ZInf, .XExpMax,
-                                 .X, .Y, .Z, .XPostBox, .NfE, .BiasE);
+                                 .XInf, .YInf, .ZInf, .XExpMax, .Funct3E, .W64,
+                                 .X, .Y, .Z, .XPostBox);
 
    ///////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -711,49 +902,61 @@ module testbench_fp;
    // instantiate devices under test
    if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma
       fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
-		   .Xe(Xe), .Ye(Ye), .Ze(Ze), 
-		   .Xm(Xm), .Ym(Ym), .Zm(Zm),
-		   .XZero, .YZero, .ZZero, .Ss, .Se,
-		   .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps,
-		   .ASticky); 
+                   .Xe(Xe), .Ye(Ye), .Ze(Ze), 
+                   .Xm(Xm), .Ym(Ym), .Zm(Zm),
+                   .XZero, .YZero, .ZZero, .Ss, .Se,
+                   .OpCtrl(OpCtrlVal[2:0]), .Sm, .InvA, .SCnt, .As, .Ps,
+                   .ASticky); 
    end
    
-   postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
-				.OpCtrl(OpCtrlVal), .DivUm(Quot), .DivUe(DivCalcExp),
-				.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
-				.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
-				.XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
-				.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
-				.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-				.FmaASticky(ASticky), .FmaSe(Se),
-				.FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-				.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes), .Zfa(1'b0));
-   
    if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
       fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
-		      .XZero(XZero), .OpCtrl(OpCtrlVal), .IntZero,
-		      .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), 
-		      .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
+                      .XZero(XZero), .OpCtrl(OpCtrlVal[2:0]), .IntZero,
+                      .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), 
+                      .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
    end
 
    if (TEST === "cmp" | TEST === "all") begin: fcmp
-      fcmp #(P) fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Zfa(1'b0), .Xs, .Ys, .Xe, .Ye, 
+      fcmp #(P) fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal[2:0]), .Xs, .Ys, .Xe, .Ye, 
                    .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
-                   .XNaN, .YNaN, .XSNaN, .YSNaN, .X(X[P.FLEN-1:0]), .Y(Y[P.FLEN-1:0]), .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
+                   .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
    end
    
    if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
       fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-			     .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
-			     .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
-			     .XNaNE(XNaN), .YNaNE(YNaN), .NfE, .BiasE,
-			     .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
-			     .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
-			     .UmM(Quot),
-			     .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
-			     .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
-			     .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
+                             .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+                             .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+                             .XNaNE(XNaN), .YNaNE(YNaN), 
+                             .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                             .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
+                             .UmM(Quot),
+                             .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
+                             .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
+                             .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
+   if (TEST === "fdivremsqrt" | TEST === "div_drsu" | TEST === "sqrt_drsu" | TEST === "intdivrem" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" ) begin: divremsqrt
+    drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+      .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL),
+      .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]),
+      .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), 
+      .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64),
+      .StallM(1'b0), .FDivBusyE,
+      .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
+      .Funct3E(Funct3E), .IntDivE(IntDivE), 
+      .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
+  end
+  else begin: postprocess
+    postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+                .OpCtrl(OpCtrlVal[2:0]), .DivUm(Quot), .DivUe(DivCalcExp),
+                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+                .FmaASticky(ASticky), .FmaSe(Se),
+                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+  end
 
    assign CmpFlg[3:0] = 0;
 
@@ -781,7 +984,7 @@ module testbench_fp;
          ResNaN = 1'b0;
       end
       else if (UnitVal === `CVTFPUNIT) begin
-	 case (OpCtrlVal[1:0])
+         case (OpCtrlVal[1:0])
            2'b11: begin // quad             
               AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]);
               ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]);
@@ -798,10 +1001,10 @@ module testbench_fp;
               AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
               ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
            end
-	 endcase
+         endcase
       end
       else begin
-	 case (FmtVal)
+         case (FmtVal)
            2'b11: begin // quad             
               AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]);
               ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]);
@@ -818,60 +1021,72 @@ module testbench_fp;
               AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
               ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
            end
-	 endcase
+         endcase
       end
    end 
    
    always_comb begin
-      FpResExtended = {{(P.Q_LEN-P.FLEN){1'b1}}, FpRes};
       // select the result to check
       case (UnitVal)
-	`FMAUNIT: Res = FpResExtended;
-	`DIVUNIT: Res = FpResExtended;
-	`CMPUNIT: Res = {{(Q_LEN-XLEN){1'b0}}, CmpRes};
-	`CVTINTUNIT: if (WriteIntVal) Res = {{(Q_LEN-XLEN){1'b0}}, IntRes}; else Res = FpResExtended;
-	`CVTFPUNIT: Res = FpResExtended;
+        `FMAUNIT: Res = FpRes;
+        `DIVUNIT: Res = FpRes;
+        `CMPUNIT: Res = CmpRes;
+        `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
+        `CVTFPUNIT: Res = FpRes;
+        `INTDIVUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
       endcase
 
       // select the flag to check
       case (UnitVal)
-	`FMAUNIT: ResFlg = Flg;
-	`DIVUNIT: ResFlg = Flg;
-	`CMPUNIT: ResFlg = CmpFlg;
-	`CVTINTUNIT: ResFlg = Flg;
-	`CVTFPUNIT: ResFlg = Flg;
+        `FMAUNIT: ResFlg = Flg;
+        `DIVUNIT: ResFlg = Flg;
+        `CMPUNIT: ResFlg = CmpFlg;
+        `CVTINTUNIT: ResFlg = Flg;
+        `CVTFPUNIT: ResFlg = Flg;
+        `INTDIVUNIT: ResFlg = Flg;
       endcase 
 
       // Use four state test sequence to handle div properly.
       // Four states should allow other operations to finish
       // properly and within time.
       case (state)
-	S0: begin
-	   DivStart = 1'b0;
-	   nextstate = Start;
-	end
-	Start: begin
-	   if (UnitVal == `DIVUNIT)	  
-	     DivStart = 1'b1;
-	   else
-	     DivStart = 1'b0;	  
-	   nextstate = S2;
-	end
-	S2: begin
-	   DivStart = 1'b0;	  
-	   if ((FDivBusyE|~DivDone)&(UnitVal == `DIVUNIT))
-	     nextstate = S2;
-	   else
-	     nextstate = Done;
-	end
-	Done: begin
-	   DivStart = 1'b0;
-	   nextstate = S0;
-	end	
-   default: begin 
-      DivStart = 1'b0;
-      nextstate = S0;
-   end
+        S0: begin
+           DivStart = 1'b0;
+           nextstate = Start;
+        end
+        Start: begin
+           if (UnitVal == `DIVUNIT | (UnitVal == `INTDIVUNIT & (OpCtrlVal == `SQRT_OPCTRL | OpCtrlVal == `DIV_OPCTRL))) begin 
+             DivStart = 1'b1;
+             IntDivE = 1'b0;
+           end
+           else if (UnitVal == `INTDIVUNIT) begin
+             IDivStart = 1'b1;
+             IntDivE = 1'b1;
+           end
+           else
+             DivStart = 1'b0;	  
+           nextstate = S2;
+        end
+        S2: begin
+           DivStart = 1'b0;	  
+           IDivStart = 1'b0;
+           if ((FDivBusyE|~DivDone)&(UnitVal == `DIVUNIT | UnitVal == `INTDIVUNIT))
+             nextstate = S2;
+           else
+             nextstate = Done;
+        end
+        Done: begin
+           DivStart = 1'b0;
+           IDivStart = 1'b0;
+           IntDivE = 1'b0;
+           nextstate = S0;
+        end	
+        default: begin
+           DivStart = 1'b0;
+           IDivStart = 1'b0;
+           IntDivE = 1'b0;
+           nextstate = S0;
+        end
       endcase // case (state)
       
    end 
@@ -879,13 +1094,13 @@ module testbench_fp;
    // Provide reset for divsqrt to reset state
    initial
      begin
-	#0  reset = 1'b1;
-	#25 reset = 1'b0;     
+        #0  reset = 1'b1;
+        #25 reset = 1'b0;     
      end   
 
    // Left-over from before - will remove soon
    always @(posedge clk) 
-     OldFDivBusyE = FDivDoneE;
+   OldFDivBusyE = FDivDoneE;
 
    // state machine to handle timing for testing due
    // various cycle counts for different fp/int operations
@@ -896,12 +1111,11 @@ module testbench_fp;
       if (reset)
         state <= S0;
       else
-	      state <= nextstate;      
+        state <= nextstate;      
 
       // Increment the vector when Done with each test
-      if (state == Done) begin
-	      VectorNum += 1; // increment the vector
-      end
+      if (state == Done)
+        VectorNum += 1; // increment the vector
       
    end
 
@@ -970,83 +1184,83 @@ module testbench_fp;
       ///////////////////////////////////////////////////////////////////////////////////////////////
 
       // check if result is correct
-      assign ResMatch = ((Res[P.FLEN-1:0] === Ans[P.FLEN-1:0]) | NaNGood | (NaNGood === 1'bx));
+      assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
       assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
-      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL);
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal ==`INTDIVU_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTREMW_OPCTRL) | (OpCtrlVal == `INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) ; 
       assign FMAop = (OpCtrlVal == `FMAUNIT);  
       assign DivDone = OldFDivBusyE & ~FDivBusyE;
+      //assign DivDone =  ~FDivBusyE;
+      //assign DivDone =  FDivDoneE;
       assign CheckNow = ((DivDone | ~divsqrtop) | 
-			 (TEST == "add" | TEST == "fma" | TEST == "sub") |
-			 ((TEST == "all") & (DivDone | ~divsqrtop)));
+                         (TEST == "add" | TEST == "fma" | TEST == "sub") |
+                         ((TEST == "all") & (DivDone | ~divsqrtop)));
             
       if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin
          errors += 1;
          $display("\nError in %s", Tests[TestNum]);
-         $display("TestNum %d VectorNum %d OpCtrl %d", TestNum, VectorNum, OpCtrl[TestNum]);	 
-         $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", 
-            X[P.FLEN-1:0], Y[P.FLEN-1:0], Z[P.FLEN-1:0], SrcA, Res[P.FLEN-1:0], ResFlg, Ans[P.FLEN-1:0], AnsFlg);
-         //$display("  fma.Xs %h Xe %h Xm %h Ys %h Ye %h Ym %h Ss %h Se %h Sm %h", fma.Xs, fma.Xe, fma.Xm, fma.Ys, fma.Ye, fma.Ym, fma.Ss, fma.Se, fma.Sm);
-         //$display("  readvectors.unpack.X %h Xs %h Xe %h Xm %h", readvectors.unpack.X, readvectors.unpack.Xs, readvectors.unpack.Xe, readvectors.unpack.Xm);
+         $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
+         $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
          $stop;
       end
-
-      if (TestVectors[VectorNum] == '1 & Tests[TestNum] !== "") begin // if reached the eof
+      
+      if (TestVectors[VectorNum][100:0] === 101'bx & Tests[TestNum] !== "" ) begin // if reached the eof
          // increment the test
          TestNum += 1;
          // clear the vectors
-         for(int i=0; i<MAXVECTORS; i++) TestVectors[i] = '1;
+         for(int i=0; i<MAXVECTORS; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}};
          // read next files
          $readmemh({`PATH, Tests[TestNum]}, TestVectors);
          // set the vector index back to 0
          VectorNum = 0;
          // incemet the operation if all the rounding modes have been tested
-         if (FrmNum === 4) OpCtrlNum += 1;
+         if (FrmNum === 4 | WriteIntVal == 1'b1) OpCtrlNum += 1;
          // increment the rounding mode or loop back to rne 
          if (FrmNum < 4) FrmNum += 1;
          else begin
-	         FrmNum = 0;
+            FrmNum = 0;
             // Add some time as a buffer between tests at the end of each test
-            // (to be removed, but as of 7/14/24 breaks Verilator sqrt sim to remove dh)
+            // (to be removed)
             repeat (10)
-                  @(posedge clk);
-	      end
-         // if no more Tests - finish
-         if (Tests[TestNum] === "") begin
-                  $display("\nAll Tests completed with %d errors\n", errors);
-                  $stop;
-         end 
+              @(posedge clk);
+         end
+         
          $display("Running %s vectors", Tests[TestNum]);
       end
+      // if no more Tests - finish
+      if (Tests[TestNum] === "") begin
+              $display("\nAll Tests completed with %d errors\n", errors);
+              $stop;
+      end 
    end
 endmodule
 
 
 module readvectors import cvw::*; #(parameter cvw_t P) (
-		    input logic 		clk,
-		    input logic [P.Q_LEN*4+7:0] 	TestVector,
-		    input logic [P.FMTBITS-1:0] ModFmt,
-		    input logic [1:0] 		Fmt,
-		    input logic [2:0] 		Unit,
-		    input logic [31:0] 		VectorNum,
-		    input logic [31:0] 		TestNum,
-		    input logic [2:0] 		OpCtrl,
-   	    output logic [P.Q_LEN-1:0] 	Ans,
-		    output logic [P.XLEN-1:0] 	SrcA,
-		    output logic [4:0] 		AnsFlg,
-		    output logic 		Xs, Ys, Zs, // sign bits of XYZ
-		    output logic [P.NE-1:0] 	Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
-		    output logic [P.NF:0] 	Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
-		    output logic 		XNaN, YNaN, ZNaN, // is XYZ a NaN
-		    output logic 		XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
-		    output logic 		XSubnorm, ZSubnorm, // is XYZ denormalized
-		    output logic 		XZero, YZero, ZZero, // is XYZ zero
-		    output logic 		XInf, YInf, ZInf, // is XYZ infinity
-		    output logic 		XExpMax,
-		    output logic [P.Q_LEN-1:0] 	X, Y, Z,
-          output logic [P.FLEN-1:0]  XPostBox,
-	       output logic [P.NE-2:0] BiasE,                              // Bias of exponent
-          output logic [P.LOGFLEN-1:0] NfE                           // Number of fractional bits
-		    );
+                    input logic 		clk,
+                    input logic [P.Q_LEN*4+7:0] 	TestVector,
+                    input logic [P.FMTBITS-1:0] ModFmt,
+                    input logic [1:0] 		Fmt,
+                    input logic [2:0] 		Unit,
+                    input logic [31:0] 		VectorNum,
+                    input logic [31:0] 		TestNum,
+                    input logic [3:0] 		OpCtrl,
+                    output logic [P.FLEN-1:0] 	Ans,
+                    output logic [P.XLEN-1:0] 	SrcA,
+                    output logic [P.XLEN-1:0] 	SrcB,
+                    output logic [4:0] 		AnsFlg,
+                    output logic 		Xs, Ys, Zs, // sign bits of XYZ
+                    output logic [P.NE-1:0] 	Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
+                    output logic [P.NF:0] 	Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
+                    output logic 		XNaN, YNaN, ZNaN, // is XYZ a NaN
+                    output logic 		XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
+                    output logic 		XSubnorm, ZSubnorm, // is XYZ denormalized
+                    output logic 		XZero, YZero, ZZero, // is XYZ zero
+                    output logic 		XInf, YInf, ZInf, // is XYZ infinity
+                    output logic 		XExpMax,
+                    output logic [2:0]          Funct3E,
+                    output logic                W64,
+                    output logic [P.FLEN-1:0] 	X, Y, Z, XPostBox
+                    );
 
    localparam Q_LEN = 32'd128;
    
@@ -1059,322 +1273,412 @@ module readvectors import cvw::*; #(parameter cvw_t P) (
    // Format of vectors Inputs(1/2/3)_AnsFlg
    always @(posedge clk) begin
       AnsFlg = TestVector[4:0];
-      //$display("  Entering readvectors with VectorNum=%d, TestVector=%x, Unit=%d, Fmt=%d, OpCtrl=%d", VectorNum, TestVector, Unit, Fmt, OpCtrl); */
       case (Unit)
-	`FMAUNIT:
+        `FMAUNIT:
           case (Fmt)
-            2'b11: if (P.Q_SUPPORTED) begin // quad
+            2'b11: begin // quad
                if (OpCtrl === `FMA_OPCTRL) begin
-		  X = TestVector[8+4*(P.Q_LEN)-1:8+3*(P.Q_LEN)];
-		  Y = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
-		  Z = TestVector[8+2*(P.Q_LEN)-1:8+P.Q_LEN];
+                  X = TestVector[8+4*(P.Q_LEN)-1:8+3*(P.Q_LEN)];
+                  Y = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                  Z = TestVector[8+2*(P.Q_LEN)-1:8+P.Q_LEN];
                end
                else begin
-		  X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
-		  if (OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)]; else Y = {2'b0, {P.Q_NE-1{1'b1}}, (P.Q_NF)'(0)};
-		  if (OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                  X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                  if (OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)]; else Y = {2'b0, {P.Q_NE-1{1'b1}}, (P.Q_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
                end
                Ans = TestVector[8+(P.Q_LEN-1):8];
             end
             2'b01: if (P.D_SUPPORTED) begin // double
                if (OpCtrl === `FMA_OPCTRL) begin
-		  X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+4*(P.D_LEN)-1:8+3*(P.D_LEN)]};
-		  Y = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
-		  Z = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+P.D_LEN]};
+                  X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+4*(P.D_LEN)-1:8+3*(P.D_LEN)]};
+                  Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                  Z = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+P.D_LEN]};
                end
                else begin
-		  X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
-		  if (OpCtrl === `MUL_OPCTRL) Y = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]}; 
-		  else Y = {{P.Q_LEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)};
-		  if (OpCtrl === `MUL_OPCTRL) Z = {{P.Q_LEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}}; 
-		  else Z = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]}; 
+                  else Y = {{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
                end
-               Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+               Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
             end
-            2'b00: if (P.F_SUPPORTED) begin // single
+            2'b00: if (P.S_SUPPORTED) begin // single
                if (OpCtrl === `FMA_OPCTRL) begin
-		  X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+4*(P.S_LEN)-1:8+3*(P.S_LEN)]};
-		  Y = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
-		  Z = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+P.S_LEN]};
+                  X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+4*(P.S_LEN)-1:8+3*(P.S_LEN)]};
+                  Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                  Z = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+P.S_LEN]};
                end
                else begin
-		  X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
-		  if (OpCtrl === `MUL_OPCTRL) Y = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]}; 
-		  else Y = {{P.Q_LEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)};
-		  if (OpCtrl === `MUL_OPCTRL) Z = {{P.Q_LEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}}; 
-		  else Z = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]};
+                  X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]}; 
+                  else Y = {{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]};
                end
-               Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+               Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
             end
             2'b10: begin // half
                if (OpCtrl === `FMA_OPCTRL) begin
-		  X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+4*(P.H_LEN)-1:8+3*(P.H_LEN)]};
-		  Y = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
-		  Z = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+P.H_LEN]};
+                  X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+4*(P.H_LEN)-1:8+3*(P.H_LEN)]};
+                  Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                  Z = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+P.H_LEN]};
                end
                else begin
-		  X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
-		  if (OpCtrl === `MUL_OPCTRL) Y = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]}; 
-		  else Y = {{P.Q_LEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)};
-		  if (OpCtrl === `MUL_OPCTRL) Z = {{P.Q_LEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}}; 
-		  else Z = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                  X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]}; 
+                  else Y = {{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
                end
-               Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+               Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
             end
           endcase
-	`DIVUNIT:
+        `DIVUNIT:
           if (OpCtrl[0])
             case (Fmt)
               2'b11: begin // quad
-		 X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
-		 Ans = TestVector[8+(P.Q_LEN-1):8];
+                 X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                 Ans = TestVector[8+(P.Q_LEN-1):8];
               end
               2'b01: if (P.D_SUPPORTED) begin // double
-		 X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
-		 Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                 Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
               end
-              2'b00: if (P.F_SUPPORTED) begin // single
-		 X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
-		 Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+              2'b00: if (P.S_SUPPORTED) begin // single
+                 X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                 Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
               end
               2'b10: begin // half
-		 X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
-		 Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                 Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
               end
             endcase
           else
             case (Fmt)
               2'b11: begin // quad
-		 X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
-		 Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
-		 Ans = TestVector[8+(P.Q_LEN-1):8];
+                 X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                 Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                 Ans = TestVector[8+(P.Q_LEN-1):8];
               end
               2'b01: if (P.D_SUPPORTED) begin // double
-		 X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
-		 Y = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
-		 Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                 Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                 Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
               end
-              2'b00: if (P.F_SUPPORTED) begin // single
-		 X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
-		 Y = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
-		 Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+              2'b00: if (P.S_SUPPORTED) begin // single
+                 X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                 Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                 Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
               end
               2'b10: begin // half
-		 X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
-		 Y = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
-		 Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                 Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                 Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
               end
             endcase
-	`CMPUNIT:
+         `INTDIVUNIT: begin
+            if (!(OpCtrl === `DIV_OPCTRL | OpCtrl === `SQRT_OPCTRL)) begin
+               SrcA = TestVector[2*(P.Q_LEN)+P.XLEN-1+12:2*(P.Q_LEN)+12]; 
+               SrcB = TestVector[(P.Q_LEN)+P.XLEN-1+12:P.Q_LEN+12];
+               Ans = TestVector[P.XLEN-1+12:12];
+               // no flag checking for intdiv test cases
+               AnsFlg = 5'bx;
+               case (OpCtrl)
+               `INTDIV_OPCTRL: begin
+                  Funct3E = 3'b100;
+                  W64 = 1'b0;
+               end
+               `INTREM_OPCTRL: begin
+                  Funct3E = 3'b110;
+                  W64 = 1'b0;
+               end
+               `INTREMU_OPCTRL: begin
+                  Funct3E = 3'b111;
+                  W64 = 1'b0;
+               end
+               `INTDIVU_OPCTRL: begin
+                  Funct3E = 3'b101;
+                  W64 = 1'b0;
+               end
+               `INTDIVW_OPCTRL: begin
+                  Funct3E = 3'b100;
+                  W64 = 1'b1;
+               end
+               `INTDIVUW_OPCTRL: begin
+                  Funct3E = 3'b101;
+                  W64 = 1'b1;
+               end
+               `INTREMW_OPCTRL: begin
+                     Funct3E = 3'b110;
+                     W64 = 1'b1;
+               end
+               `INTREMUW_OPCTRL: begin
+                  Funct3E = 3'b111;
+                  W64 = 1'b1;
+               end
+               default: begin
+                  Funct3E = 3'b000;
+                  W64 = 1'b0;
+               end
+               endcase
+            end
+            // testing div/sqrt on drsu
+            else begin
+               if (OpCtrl[0])
+                  case (Fmt)
+                  2'b11: begin // quad
+                     X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                     Ans = TestVector[8+(P.Q_LEN-1):8];
+                  end
+                  2'b01: if (P.D_SUPPORTED) begin // double
+                     X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                     Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                  end
+                  2'b00: if (P.S_SUPPORTED) begin // single
+                     X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                     Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                  end
+                  2'b10: begin // half
+                     X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                     Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                  end
+                  endcase
+               else
+                  case (Fmt)
+                  2'b11: begin // quad
+                     X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                     Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                     Ans = TestVector[8+(P.Q_LEN-1):8];
+                  end
+                  2'b01: if (P.D_SUPPORTED) begin // double
+                     X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                     Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                     Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                  end
+                  2'b00: if (P.S_SUPPORTED) begin // single
+                     X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                     Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                     Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                  end
+                  2'b10: begin // half
+                     X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                     Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                     Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                  end
+                  endcase
+            end
+         end
+        `CMPUNIT:
           case (Fmt)        
             2'b11: begin // quad
                X = TestVector[12+2*(P.Q_LEN)-1:12+(P.Q_LEN)];
                Y = TestVector[12+(P.Q_LEN)-1:12];
-               Ans = {{P.Q_LEN-1{1'b0}}, TestVector[8]};
+               Ans = TestVector[8];
             end
             2'b01: if (P.D_SUPPORTED) begin // double
-               X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[12+2*(P.D_LEN)-1:12+(P.D_LEN)]};
-               Y = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[12+(P.D_LEN)-1:12]};
-               Ans = {{P.Q_LEN-1{1'b0}}, TestVector[8]};
+               X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[12+2*(P.D_LEN)-1:12+(P.D_LEN)]};
+               Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[12+(P.D_LEN)-1:12]};
+               Ans = TestVector[8];
             end
-            2'b00: if (P.F_SUPPORTED) begin // single
-               X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[12+2*(P.S_LEN)-1:12+(P.S_LEN)]};
-               Y = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[12+(P.S_LEN)-1:12]};
-               Ans = {{P.Q_LEN-1{1'b0}}, TestVector[8]};
+            2'b00: if (P.S_SUPPORTED) begin // single
+               X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[12+2*(P.S_LEN)-1:12+(P.S_LEN)]};
+               Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[12+(P.S_LEN)-1:12]};
+               Ans = TestVector[8];
             end
             2'b10: begin // half
-               X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[12+2*(P.H_LEN)-1:12+(P.H_LEN)]};
-               Y = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[12+(P.H_LEN)-1:12]};
-               Ans = {{P.Q_LEN-1{1'b0}}, TestVector[8]};
+               X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[12+2*(P.H_LEN)-1:12+(P.H_LEN)]};
+               Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[12+(P.H_LEN)-1:12]};
+               Ans = TestVector[8];
             end
           endcase
-	`CVTFPUNIT:
+        `CVTFPUNIT:
           case (Fmt)
             2'b11: begin // quad
                case (OpCtrl[1:0])
-		 2'b11: begin // quad
-		    X = {TestVector[8+P.Q_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
-		    Ans = TestVector[8+(P.Q_LEN-1):8];
-		 end
-		 2'b01:	if (P.D_SUPPORTED) begin // double
-		    X = {TestVector[8+P.Q_LEN+P.D_LEN-1:8+(P.D_LEN)]};
-		    Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
-		 end
-		 2'b00:	begin // single
-		    X = {TestVector[8+P.Q_LEN+P.S_LEN-1:8+(P.S_LEN)]};
-		    Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
-		 end
-		 2'b10:	begin // half
-		    X = {TestVector[8+P.Q_LEN+P.H_LEN-1:8+(P.H_LEN)]};
-		    Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
-		 end
+                 2'b11: begin // quad
+                    X = {TestVector[8+P.Q_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {TestVector[8+P.Q_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {TestVector[8+P.Q_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {TestVector[8+P.Q_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
                endcase
             end
             2'b01: if (P.D_SUPPORTED) begin // double
                case (OpCtrl[1:0])
-		 2'b11: begin // quad
-		    X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
-		    Ans = TestVector[8+(P.Q_LEN-1):8];
-		 end
-		 2'b01:	begin // double
-		    X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.D_LEN-1:8+(P.D_LEN)]};
-		    Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
-		 end
-		 2'b00:	begin // single
-		    X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.S_LEN-1:8+(P.S_LEN)]};
-		    Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
-		 end
-		 2'b10:	begin // half
-		    X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.H_LEN-1:8+(P.H_LEN)]};
-		    Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
-		 end
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	begin // double
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
                endcase
             end
-            2'b00: if (P.F_SUPPORTED) begin // single
+            2'b00: if (P.S_SUPPORTED) begin // single
                case (OpCtrl[1:0])
-		 2'b11: begin // quad
-		    X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
-		    Ans = TestVector[8+(P.Q_LEN-1):8];
-		 end
-		 2'b01:	if (P.D_SUPPORTED) begin // double
-		    X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.D_LEN-1:8+(P.D_LEN)]};
-		    Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
-		 end
-		 2'b00:	begin // single
-		    X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.S_LEN-1:8+(P.S_LEN)]};
-		    Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
-		 end
-		 2'b10:	begin // half
-		    X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.H_LEN-1:8+(P.H_LEN)]};
-		    Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
-		 end
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
                endcase
             end
             2'b10: begin // half
                case (OpCtrl[1:0])
-		 2'b11: begin // quad
-		    X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
-		    Ans = TestVector[8+(P.Q_LEN-1):8];
-		 end
-		 2'b01:	if (P.D_SUPPORTED) begin // double
-		    X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.D_LEN-1:8+(P.D_LEN)]};
-		    Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
-		 end
-		 2'b00:	if (P.F_SUPPORTED) begin // single
-		    X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.S_LEN-1:8+(P.S_LEN)]};
-		    Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
-		 end
-		 2'b10:	begin // half
-		    X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.H_LEN-1:8+(P.H_LEN)]};
-		    Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
-		 end
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	if (P.S_SUPPORTED) begin // single
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
                endcase
             end
           endcase        
-	`CVTINTUNIT:
+        `CVTINTUNIT:
           case (Fmt)
             2'b11: begin // quad
                // {is the integer a long, is the opperation to an integer}
-               casez ({OpCtrl[2:1]})
-		 2'b11: begin // long -> quad
-                    X = {P.Q_LEN{1'bx}};
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> quad
+                    X = {P.FLEN{1'bx}};
                     SrcA = TestVector[8+P.Q_LEN+P.XLEN-1:8+(P.Q_LEN)];
                     Ans = TestVector[8+(P.Q_LEN-1):8];
-		 end
-		 2'b10:	begin // int -> quad
+                 end
+                 2'b10:	begin // int -> quad
                     // correctly sign extend the integer depending on if it's a signed/unsigned test
-                    X = {P.Q_LEN{1'bx}};
+                    X = {P.FLEN{1'bx}};
                     SrcA = {{P.XLEN-32{TestVector[8+P.Q_LEN+32-1]}}, TestVector[8+P.Q_LEN+32-1:8+(P.Q_LEN)]};
                     Ans = TestVector[8+(P.Q_LEN-1):8];
-		 end
-		 2'b01:	begin // quad -> long
+                 end
+                 2'b01:	begin // quad -> long
                     X = {TestVector[8+P.XLEN+P.Q_LEN-1:8+(P.XLEN)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]};
-		 end
-		 2'b00:	begin // quad -> int
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // quad -> int
                     X = {TestVector[8+32+P.Q_LEN-1:8+(32)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-32){TestVector[8+32-1]}},TestVector[8+(32-1):8]};
-		 end
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
                endcase
             end
             2'b01: if (P.D_SUPPORTED) begin // double
                // {Int->Fp?, is the integer a long}
-               casez ({OpCtrl[2:1]})
-		 2'b11: begin // long -> double
-                    X = {P.Q_LEN{1'bx}};
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> double
+                    X = {P.FLEN{1'bx}};
                     SrcA = TestVector[8+P.D_LEN+P.XLEN-1:8+(P.D_LEN)];
-                    Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
-		 end
-		 2'b10:	begin // int -> double
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> double
                     // correctly sign extend the integer depending on if it's a signed/unsigned test
-                    X = {P.Q_LEN{1'bx}};
+                    X = {P.FLEN{1'bx}};
                     SrcA = {{P.XLEN-32{TestVector[8+P.D_LEN+32-1]}}, TestVector[8+P.D_LEN+32-1:8+(P.D_LEN)]};
-                    Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
-		 end
-		 2'b01:	begin // double -> long
-                    X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b01:	begin // double -> long
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]};
-		 end
-		 2'b00:	begin // double -> int
-                    X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // double -> int
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{P.Q_LEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
-		 end
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
                endcase
             end
-            2'b00: if (P.F_SUPPORTED) begin // single
+            2'b00: if (P.S_SUPPORTED) begin // single
                // {is the integer a long, is the opperation to an integer}
-               casez ({OpCtrl[2:1]})
-		 2'b11: begin // long -> single
-                    X = {P.Q_LEN{1'bx}};
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> single
+                    X = {P.FLEN{1'bx}};
                     SrcA = TestVector[8+P.S_LEN+P.XLEN-1:8+(P.S_LEN)];
-                    Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
-		 end
-		 2'b10:	begin // int -> single
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> single
                     // correctly sign extend the integer depending on if it's a signed/unsigned test
-                    X = {P.Q_LEN{1'bx}};
+                    X = {P.FLEN{1'bx}};
                     SrcA = {{P.XLEN-32{TestVector[8+P.S_LEN+32-1]}}, TestVector[8+P.S_LEN+32-1:8+(P.S_LEN)]};
-                    Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
-		 end
-		 2'b01:	begin // single -> long
-                    X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b01:	begin // single -> long
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]};
-		 end
-		 2'b00:	begin // single -> int
-                    X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // single -> int
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-32){TestVector[8+32-1]}},TestVector[8+(32-1):8]};
-		 end
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
                endcase
             end
             2'b10: begin // half
                // {is the integer a long, is the opperation to an integer}
-               casez ({OpCtrl[2:1]})
-		 2'b11: begin // long -> half
-                    X = {P.Q_LEN{1'bx}};
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> half
+                    X = {P.FLEN{1'bx}};
                     SrcA = TestVector[8+P.H_LEN+P.XLEN-1:8+(P.H_LEN)];
-                    Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
-		 end
-		 2'b10:	begin // int -> half
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> half
                     // correctly sign extend the integer depending on if it's a signed/unsigned test
-                    X = {P.Q_LEN{1'bx}};
+                    X = {P.FLEN{1'bx}};
                     SrcA = {{P.XLEN-32{TestVector[8+P.H_LEN+32-1]}}, TestVector[8+P.H_LEN+32-1:8+(P.H_LEN)]};
-                    Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
-		 end
-		 2'b01:	begin // half -> long
-                    X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+                 2'b01:	begin // half -> long
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]};
-		 end
-		 2'b00:	begin // half -> int
-                    X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // half -> int
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]};
                     SrcA = {P.XLEN{1'bx}};
-                    Ans = {{(P.Q_LEN-32){TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
-		 end
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
+                 end
                endcase
             end
           endcase
@@ -1382,13 +1686,13 @@ module readvectors import cvw::*; #(parameter cvw_t P) (
    end
 
    assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]);
-   assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]));
+   assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]) | ((Unit == `INTDIVUNIT) & OpCtrl === `SQRT_OPCTRL));
    assign ZEn = (Unit == `FMAUNIT);
    assign FPUActive = 1'b1;
    
-   unpack #(P) unpack(.X(X[P.FLEN-1:0]), .Y(Y[P.FLEN-1:0]), .Z(Z[P.FLEN-1:0]), .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
+   unpack #(P) unpack(.X, .Y, .Z, .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
                       .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
                       .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
-                      .XEn, .YEn, .ZEn, .XExpMax, .XPostBox, .Bias(BiasE), .Nf(NfE));
+                      .XEn, .YEn, .ZEn, .XExpMax, .XPostBox);
 
 endmodule
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
new file mode 100644
index 000000000..64babccd4
--- /dev/null
+++ b/testbench/tests-fp.vh
@@ -0,0 +1,639 @@
+//////////////////////////////////////////
+// tests0fo.vh
+//
+// Written: Katherine Parry 2022
+// Modified: 
+//
+// Purpose: List of floating-point tests to apply
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021-3 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`define PATH "../../tests/fp/vectors/"
+`define ADD_OPCTRL     4'b0110
+`define MUL_OPCTRL     4'b0100
+`define SUB_OPCTRL     4'b0111
+`define FMA_OPCTRL     4'b0000
+`define DIV_OPCTRL     4'b0000
+`define SQRT_OPCTRL    4'b0001
+`define LE_OPCTRL      4'b0011
+`define LT_OPCTRL      4'b0001
+`define EQ_OPCTRL      4'b0010
+`define TO_UI_OPCTRL   4'b0000
+`define TO_I_OPCTRL    4'b0001
+`define TO_UL_OPCTRL   4'b0010
+`define TO_L_OPCTRL    4'b0011
+`define FROM_UI_OPCTRL 4'b0100
+`define FROM_I_OPCTRL  4'b0101
+`define FROM_UL_OPCTRL 4'b0110
+`define FROM_L_OPCTRL  4'b0111
+`define INTREMU_OPCTRL 4'b1001
+`define INTREM_OPCTRL  4'b1010
+`define INTDIV_OPCTRL  4'b1011
+`define INTDIVW_OPCTRL 4'b1100
+`define INTDIVU_OPCTRL 4'b1101
+`define INTREMW_OPCTRL 4'b1110
+`define INTREMUW_OPCTRL 4'b1111
+`define INTDIVUW_OPCTRL 4'b1000
+`define RNE            3'b000
+`define RZ             3'b001
+`define RU             3'b011
+`define RD             3'b010
+`define RNM            3'b100
+`define FMAUNIT        2
+`define DIVUNIT        1
+`define CVTINTUNIT     0
+`define CVTFPUNIT      4
+`define CMPUNIT        3
+`define DIVREMSQRTUNIT 5
+`define INTDIVUNIT     6
+
+string f16rv32cvtint[] = '{
+	"ui32_to_f16_rne.tv",
+	"ui32_to_f16_rz.tv",
+	"ui32_to_f16_ru.tv",
+	"ui32_to_f16_rd.tv",
+	"ui32_to_f16_rnm.tv",
+	"i32_to_f16_rne.tv",
+	"i32_to_f16_rz.tv",
+	"i32_to_f16_ru.tv",
+	"i32_to_f16_rd.tv",
+	"i32_to_f16_rnm.tv",
+	"f16_to_ui32_rne.tv",
+	"f16_to_ui32_rz.tv",
+	"f16_to_ui32_ru.tv",
+	"f16_to_ui32_rd.tv",
+	"f16_to_ui32_rnm.tv",
+	"f16_to_i32_rne.tv",
+	"f16_to_i32_rz.tv",
+	"f16_to_i32_ru.tv",
+	"f16_to_i32_rd.tv",
+	"f16_to_i32_rnm.tv"
+};
+
+string f16rv64cvtint[] = '{
+	"ui64_to_f16_rne.tv",
+	"ui64_to_f16_rz.tv",
+	"ui64_to_f16_ru.tv",
+	"ui64_to_f16_rd.tv",
+	"ui64_to_f16_rnm.tv",
+	"i64_to_f16_rne.tv",
+	"i64_to_f16_rz.tv",
+	"i64_to_f16_ru.tv",
+	"i64_to_f16_rd.tv",
+	"i64_to_f16_rnm.tv",
+	"f16_to_ui64_rne.tv",
+	"f16_to_ui64_rz.tv",
+	"f16_to_ui64_ru.tv",
+	"f16_to_ui64_rd.tv",
+	"f16_to_ui64_rnm.tv",
+	"f16_to_i64_rne.tv",
+	"f16_to_i64_rz.tv",
+	"f16_to_i64_ru.tv",
+	"f16_to_i64_rd.tv",
+	"f16_to_i64_rnm.tv"
+};
+
+string f32rv32cvtint[] = '{
+	"ui32_to_f32_rne.tv",
+	"ui32_to_f32_rz.tv",
+	"ui32_to_f32_ru.tv",
+	"ui32_to_f32_rd.tv",
+	"ui32_to_f32_rnm.tv",
+	"i32_to_f32_rne.tv",
+	"i32_to_f32_rz.tv",
+	"i32_to_f32_ru.tv",
+	"i32_to_f32_rd.tv",
+	"i32_to_f32_rnm.tv",
+	"f32_to_ui32_rne.tv",
+	"f32_to_ui32_rz.tv",
+	"f32_to_ui32_ru.tv",
+	"f32_to_ui32_rd.tv",
+	"f32_to_ui32_rnm.tv",
+	"f32_to_i32_rne.tv",
+	"f32_to_i32_rz.tv",
+	"f32_to_i32_ru.tv",
+	"f32_to_i32_rd.tv",
+	"f32_to_i32_rnm.tv"
+};
+
+string f32rv64cvtint[] = '{
+	"ui64_to_f32_rne.tv",
+	"ui64_to_f32_rz.tv",
+	"ui64_to_f32_ru.tv",
+	"ui64_to_f32_rd.tv",
+	"ui64_to_f32_rnm.tv",
+	"i64_to_f32_rne.tv",
+	"i64_to_f32_rz.tv",
+	"i64_to_f32_ru.tv",
+	"i64_to_f32_rd.tv",
+	"i64_to_f32_rnm.tv",
+	"f32_to_ui64_rne.tv",
+	"f32_to_ui64_rz.tv",
+	"f32_to_ui64_ru.tv",
+	"f32_to_ui64_rd.tv",
+	"f32_to_ui64_rnm.tv",
+	"f32_to_i64_rne.tv",
+	"f32_to_i64_rz.tv",
+	"f32_to_i64_ru.tv",
+	"f32_to_i64_rd.tv",
+	"f32_to_i64_rnm.tv"
+};
+
+
+string f64rv32cvtint[] = '{
+	"ui32_to_f64_rne.tv",
+	"ui32_to_f64_rz.tv",
+	"ui32_to_f64_ru.tv",
+	"ui32_to_f64_rd.tv",
+	"ui32_to_f64_rnm.tv",
+	"i32_to_f64_rne.tv",
+	"i32_to_f64_rz.tv",
+	"i32_to_f64_ru.tv",
+	"i32_to_f64_rd.tv",
+	"i32_to_f64_rnm.tv",
+	"f64_to_ui32_rne.tv",
+	"f64_to_ui32_rz.tv",
+	"f64_to_ui32_ru.tv",
+	"f64_to_ui32_rd.tv",
+	"f64_to_ui32_rnm.tv",
+	"f64_to_i32_rne.tv",
+	"f64_to_i32_rz.tv",
+	"f64_to_i32_ru.tv",
+	"f64_to_i32_rd.tv",
+	"f64_to_i32_rnm.tv"
+};
+
+string f64rv64cvtint[] = '{
+	"ui64_to_f64_rne.tv",
+	"ui64_to_f64_rz.tv",
+	"ui64_to_f64_ru.tv",
+	"ui64_to_f64_rd.tv",
+	"ui64_to_f64_rnm.tv",
+	"i64_to_f64_rne.tv",
+	"i64_to_f64_rz.tv",
+	"i64_to_f64_ru.tv",
+	"i64_to_f64_rd.tv",
+	"i64_to_f64_rnm.tv",
+	"f64_to_ui64_rne.tv",
+	"f64_to_ui64_rz.tv",
+	"f64_to_ui64_ru.tv",
+	"f64_to_ui64_rd.tv",
+	"f64_to_ui64_rnm.tv",
+	"f64_to_i64_rne.tv",
+	"f64_to_i64_rz.tv",
+	"f64_to_i64_ru.tv",
+	"f64_to_i64_rd.tv",
+	"f64_to_i64_rnm.tv"
+};
+
+string f128rv64cvtint[] = '{
+	"ui64_to_f128_rne.tv",
+	"ui64_to_f128_rz.tv",
+	"ui64_to_f128_ru.tv",
+	"ui64_to_f128_rd.tv",
+	"ui64_to_f128_rnm.tv",
+	"i64_to_f128_rne.tv",
+	"i64_to_f128_rz.tv",
+	"i64_to_f128_ru.tv",
+	"i64_to_f128_rd.tv",
+	"i64_to_f128_rnm.tv",
+	"f128_to_ui64_rne.tv",
+	"f128_to_ui64_rz.tv",
+	"f128_to_ui64_ru.tv",
+	"f128_to_ui64_rd.tv",
+	"f128_to_ui64_rnm.tv",
+	"f128_to_i64_rne.tv",
+	"f128_to_i64_rz.tv",
+	"f128_to_i64_ru.tv",
+	"f128_to_i64_rd.tv",
+	"f128_to_i64_rnm.tv"
+};
+
+string f128rv32cvtint[] = '{
+	"ui32_to_f128_rne.tv",
+	"ui32_to_f128_rz.tv",
+	"ui32_to_f128_ru.tv",
+	"ui32_to_f128_rd.tv",
+	"ui32_to_f128_rnm.tv",
+	"i32_to_f128_rne.tv",
+	"i32_to_f128_rz.tv",
+	"i32_to_f128_ru.tv",
+	"i32_to_f128_rd.tv",
+	"i32_to_f128_rnm.tv",
+	"f128_to_ui32_rne.tv",
+	"f128_to_ui32_rz.tv",
+	"f128_to_ui32_ru.tv",
+	"f128_to_ui32_rd.tv",
+	"f128_to_ui32_rnm.tv",
+	"f128_to_i32_rne.tv",
+	"f128_to_i32_rz.tv",
+	"f128_to_i32_ru.tv",
+	"f128_to_i32_rd.tv",
+	"f128_to_i32_rnm.tv"
+};
+
+string f32f16cvt[] = '{
+	"f32_to_f16_rne.tv",
+	"f32_to_f16_rz.tv",
+	"f32_to_f16_ru.tv",
+	"f32_to_f16_rd.tv",
+	"f32_to_f16_rnm.tv",
+	"f16_to_f32_rne.tv",
+	"f16_to_f32_rz.tv",
+	"f16_to_f32_ru.tv",
+	"f16_to_f32_rd.tv",
+	"f16_to_f32_rnm.tv"
+};
+
+string f64f16cvt[] = '{
+	"f64_to_f16_rne.tv",
+	"f64_to_f16_rz.tv",
+	"f64_to_f16_ru.tv",
+	"f64_to_f16_rd.tv",
+	"f64_to_f16_rnm.tv",
+	"f16_to_f64_rne.tv",
+	"f16_to_f64_rz.tv",
+	"f16_to_f64_ru.tv",
+	"f16_to_f64_rd.tv",
+	"f16_to_f64_rnm.tv"
+};
+
+string f128f16cvt[] = '{
+	"f128_to_f16_rne.tv",
+	"f128_to_f16_rz.tv",
+	"f128_to_f16_ru.tv",
+	"f128_to_f16_rd.tv",
+	"f128_to_f16_rnm.tv",
+	"f16_to_f128_rne.tv",
+	"f16_to_f128_rz.tv",
+	"f16_to_f128_ru.tv",
+	"f16_to_f128_rd.tv",
+	"f16_to_f128_rnm.tv"
+};
+
+string f64f32cvt[] = '{
+	"f64_to_f32_rne.tv",
+	"f64_to_f32_rz.tv",
+	"f64_to_f32_ru.tv",
+	"f64_to_f32_rd.tv",
+	"f64_to_f32_rnm.tv",
+	"f32_to_f64_rne.tv",
+	"f32_to_f64_rz.tv",
+	"f32_to_f64_ru.tv",
+	"f32_to_f64_rd.tv",
+	"f32_to_f64_rnm.tv"
+};
+
+string f128f32cvt[] = '{
+	"f128_to_f32_rne.tv",
+	"f128_to_f32_rz.tv",
+	"f128_to_f32_ru.tv",
+	"f128_to_f32_rd.tv",
+	"f128_to_f32_rnm.tv",
+	"f32_to_f128_rne.tv",
+	"f32_to_f128_rz.tv",
+	"f32_to_f128_ru.tv",
+	"f32_to_f128_rd.tv",
+	"f32_to_f128_rnm.tv"
+};
+
+string f128f64cvt[] = '{
+	"f128_to_f64_rne.tv",
+	"f128_to_f64_rz.tv",
+	"f128_to_f64_ru.tv",
+	"f128_to_f64_rd.tv",
+	"f128_to_f64_rnm.tv",
+	"f64_to_f128_rne.tv",
+	"f64_to_f128_rz.tv",
+	"f64_to_f128_ru.tv",
+	"f64_to_f128_rd.tv",
+	"f64_to_f128_rnm.tv"
+};
+
+string f16add[] = '{
+	"f16_add_rne.tv",
+	"f16_add_rz.tv",
+	"f16_add_ru.tv",
+	"f16_add_rd.tv",
+	"f16_add_rnm.tv"
+};
+
+string f32add[] = '{
+	"f32_add_rne.tv",
+	"f32_add_rz.tv",
+	"f32_add_ru.tv",
+	"f32_add_rd.tv",
+	"f32_add_rnm.tv"
+};
+
+string f64add[] = '{
+	"f64_add_rne.tv",
+	"f64_add_rz.tv",
+	"f64_add_ru.tv",
+	"f64_add_rd.tv",
+	"f64_add_rnm.tv"
+};
+
+string f128add[] = '{
+	"f128_add_rne.tv",
+	"f128_add_rz.tv",
+	"f128_add_ru.tv",
+	"f128_add_rd.tv",
+	"f128_add_rnm.tv"
+};
+
+string f16sub[] = '{
+	"f16_sub_rne.tv",
+	"f16_sub_rz.tv",
+	"f16_sub_ru.tv",
+	"f16_sub_rd.tv",
+	"f16_sub_rnm.tv"
+};
+
+string f32sub[] = '{
+	"f32_sub_rne.tv",
+	"f32_sub_rz.tv",
+	"f32_sub_ru.tv",
+	"f32_sub_rd.tv",
+	"f32_sub_rnm.tv"
+};
+
+string f64sub[] = '{
+	"f64_sub_rne.tv",
+	"f64_sub_rz.tv",
+	"f64_sub_ru.tv",
+	"f64_sub_rd.tv",
+	"f64_sub_rnm.tv"
+};
+
+string f128sub[] = '{
+	"f128_sub_rne.tv",
+	"f128_sub_rz.tv",
+	"f128_sub_ru.tv",
+	"f128_sub_rd.tv",
+	"f128_sub_rnm.tv"
+};
+
+string f16mul[] = '{
+	"f16_mul_rne.tv",
+	"f16_mul_rz.tv",
+	"f16_mul_ru.tv",
+	"f16_mul_rd.tv",
+	"f16_mul_rnm.tv"
+};
+
+string f32mul[] = '{
+	"f32_mul_rne.tv",
+	"f32_mul_rz.tv",
+	"f32_mul_ru.tv",
+	"f32_mul_rd.tv",
+	"f32_mul_rnm.tv"
+};
+
+string f64mul[] = '{
+	"f64_mul_rne.tv",
+	"f64_mul_rz.tv",
+	"f64_mul_ru.tv",
+	"f64_mul_rd.tv",
+	"f64_mul_rnm.tv"
+};
+
+string f128mul[] = '{
+	"f128_mul_rne.tv",
+	"f128_mul_rz.tv",
+	"f128_mul_ru.tv",
+	"f128_mul_rd.tv",
+	"f128_mul_rnm.tv"
+};
+
+string f16div[] = '{
+	"f16_div_rne.tv",
+	"f16_div_rz.tv",
+	"f16_div_ru.tv",
+	"f16_div_rd.tv",
+	"f16_div_rnm.tv"
+};
+
+string f32div[] = '{
+	"f32_div_rne.tv",
+	"f32_div_rz.tv",
+	"f32_div_ru.tv",
+	"f32_div_rd.tv",
+	"f32_div_rnm.tv"
+};
+
+string f64div[] = '{
+	"f64_div_rne.tv",
+	"f64_div_rz.tv",
+	"f64_div_ru.tv",
+	"f64_div_rd.tv",
+	"f64_div_rnm.tv"
+};
+
+string f128div[] = '{
+	"f128_div_rne.tv",
+	"f128_div_rz.tv",
+	"f128_div_ru.tv",
+	"f128_div_rd.tv",
+	"f128_div_rnm.tv"
+};
+
+string f16sqrt[] = '{
+	"f16_sqrt_rne.tv",
+	"f16_sqrt_rz.tv",
+	"f16_sqrt_ru.tv",
+	"f16_sqrt_rd.tv",
+	"f16_sqrt_rnm.tv"
+};
+
+string f32sqrt[] = '{
+	"f32_sqrt_rne.tv",
+	"f32_sqrt_rz.tv",
+	"f32_sqrt_ru.tv",
+	"f32_sqrt_rd.tv",
+	"f32_sqrt_rnm.tv"
+};
+
+string f64sqrt[] = '{
+	"f64_sqrt_rne.tv",
+	"f64_sqrt_rz.tv",
+	"f64_sqrt_ru.tv",
+	"f64_sqrt_rd.tv",
+	"f64_sqrt_rnm.tv"
+};
+
+string f128sqrt[] = '{
+	"f128_sqrt_rne.tv",
+	"f128_sqrt_rz.tv",
+	"f128_sqrt_ru.tv",
+	"f128_sqrt_rd.tv",
+	"f128_sqrt_rnm.tv"
+};
+
+string f16cmp[] = '{
+	"f16_eq_rne.tv",
+	"f16_eq_rz.tv",
+	"f16_eq_ru.tv",
+	"f16_eq_rd.tv",
+	"f16_eq_rnm.tv",
+	"f16_le_rne.tv",
+	"f16_le_rz.tv",
+	"f16_le_ru.tv",
+	"f16_le_rd.tv",
+	"f16_le_rnm.tv",
+	"f16_lt_rne.tv",
+	"f16_lt_rz.tv",
+	"f16_lt_ru.tv",
+	"f16_lt_rd.tv",
+	"f16_lt_rnm.tv"
+};
+
+string f32cmp[] = '{
+	"f32_eq_rne.tv",
+	"f32_eq_rz.tv",
+	"f32_eq_ru.tv",
+	"f32_eq_rd.tv",
+	"f32_eq_rnm.tv",
+	"f32_le_rne.tv",
+	"f32_le_rz.tv",
+	"f32_le_ru.tv",
+	"f32_le_rd.tv",
+	"f32_le_rnm.tv",
+	"f32_lt_rne.tv",
+	"f32_lt_rz.tv",
+	"f32_lt_ru.tv",
+	"f32_lt_rd.tv",
+	"f32_lt_rnm.tv"
+};
+
+string f64cmp[] = '{
+	"f64_eq_rne.tv",
+	"f64_eq_rz.tv",
+	"f64_eq_ru.tv",
+	"f64_eq_rd.tv",
+	"f64_eq_rnm.tv",
+	"f64_le_rne.tv",
+	"f64_le_rz.tv",
+	"f64_le_ru.tv",
+	"f64_le_rd.tv",
+	"f64_le_rnm.tv",
+	"f64_lt_rne.tv",
+	"f64_lt_rz.tv",
+	"f64_lt_ru.tv",
+	"f64_lt_rd.tv",
+	"f64_lt_rnm.tv"
+};
+
+string f128cmp[] = '{
+	"f128_eq_rne.tv",
+	"f128_eq_rz.tv",
+	"f128_eq_ru.tv",
+	"f128_eq_rd.tv",
+	"f128_eq_rnm.tv",
+	"f128_le_rne.tv",
+	"f128_le_rz.tv",
+	"f128_le_ru.tv",
+	"f128_le_rd.tv",
+	"f128_le_rnm.tv",
+	"f128_lt_rne.tv",
+	"f128_lt_rz.tv",
+	"f128_lt_ru.tv",
+	"f128_lt_rd.tv",
+	"f128_lt_rnm.tv"
+};
+
+string f16fma[] = '{
+	"f16_mulAdd_rne.tv",
+	"f16_mulAdd_rz.tv",
+	"f16_mulAdd_ru.tv",
+	"f16_mulAdd_rd.tv",
+	"f16_mulAdd_rnm.tv"
+};
+
+string f32fma[] = '{
+	"f32_mulAdd_rne.tv",
+	"f32_mulAdd_rz.tv",
+	"f32_mulAdd_ru.tv",
+	"f32_mulAdd_rd.tv",
+	"f32_mulAdd_rnm.tv"
+};
+
+string f64fma[] = '{
+	"f64_mulAdd_rne.tv",
+	"f64_mulAdd_rz.tv",
+	"f64_mulAdd_ru.tv",
+	"f64_mulAdd_rd.tv",
+	"f64_mulAdd_rnm.tv"
+};
+
+string f128fma[] = '{
+	"f128_mulAdd_rne.tv",
+	"f128_mulAdd_rz.tv",
+	"f128_mulAdd_ru.tv",
+	"f128_mulAdd_rd.tv",
+	"f128_mulAdd_rnm.tv"
+};
+
+string int64rem[] = '{
+	"cvw_64_rem-01.tv"
+};
+
+string int64div[] = '{
+	"cvw_64_div-01.tv"
+};
+
+string int64remu[] = '{
+	"cvw_64_remu-01.tv"
+};
+
+string int64divu[] = '{
+	"cvw_64_divu-01.tv"
+};
+
+string int64remw[] = '{
+	"cvw_64_remw-01.tv"
+};
+
+string int64remuw[] = '{
+	"cvw_64_remuw-01.tv"
+};
+
+string int64divuw[] = '{
+	"cvw_64_divuw-01.tv"
+};
+
+string int64divw[] = '{
+	"cvw_64_divw-01.tv"
+};
+
+string int32rem[] = '{
+	"cvw_32_rem-01.tv"
+};
+
+string int32div[] = '{
+	"cvw_32_div-01.tv"
+};
+
+string int32remu[] = '{
+	"cvw_32_remu-01.tv"
+};
+
+string int32divu[] = '{
+	"cvw_32_divu-01.tv"
+};
diff --git a/tests/custom/spitest/Makefile b/tests/custom/spitest/Makefile
new file mode 100644
index 000000000..34e83a9e4
--- /dev/null
+++ b/tests/custom/spitest/Makefile
@@ -0,0 +1,112 @@
+CEXT		:= c
+CPPEXT		:= cpp
+AEXT		:= s
+SEXT		:= S
+SRCEXT 		:= \([$(CEXT)$(AEXT)$(SEXT)]\|$(CPPEXT)\)
+OBJEXT		:= o
+DEPEXT		:= d
+SRCDIR		:= .
+BUILDDIR	:= OBJ
+
+SOURCES		?= $(shell find $(SRCDIR) -type f -regex ".*\.$(SRCEXT)" | sort)
+OBJECTS		:= $(SOURCES:.$(CEXT)=.$(OBJEXT))
+OBJECTS		:= $(OBJECTS:.$(AEXT)=.$(OBJEXT))
+OBJECTS		:= $(OBJECTS:.$(SEXT)=.$(OBJEXT))
+OBJECTS		:= $(OBJECTS:.$(CPPEXT)=.$(OBJEXT))
+OBJECTS		:= $(patsubst $(SRCDIR)/%,$(BUILDDIR)/%,$(OBJECTS))
+
+TARGETDIR	:= bin
+TARGET		:= $(TARGETDIR)/spitest.elf
+ROOT		:= ..
+LIBRARY_DIRS	:= 
+LIBRARY_FILES	:=
+
+MARCH           :=-march=rv64imfdc
+MABI            :=-mabi=lp64d
+LINK_FLAGS      :=$(MARCH) $(MABI) -nostartfiles
+LINKER		:=$(ROOT)/linker8000-0000.x
+
+
+AFLAGS =$(MARCH) $(MABI) -W
+CFLAGS =$(MARCH) $(MABI) -mcmodel=medany  -O2
+AS=riscv64-unknown-elf-as
+CC=riscv64-unknown-elf-gcc
+AR=riscv64-unknown-elf-ar
+
+
+#Default Make
+all: directories  $(TARGET).memfile
+
+#Remake
+remake: clean all
+
+#Make the Directories
+directories:
+	@mkdir -p $(TARGETDIR)
+	@mkdir -p $(BUILDDIR)
+
+clean:
+	rm -rf $(BUILDDIR) $(TARGETDIR) *.memfile *.objdump 
+
+
+#Needed for building additional library projects
+ifdef LIBRARY_DIRS
+LIBS+=${LIBRARY_DIRS:%=-L%}  ${LIBRARY_FILES:%=-l%}
+INC+=${LIBRARY_DIRS:%=-I%}
+
+${LIBRARY_DIRS}: 
+	make -C $@ -j 1
+
+.PHONY: $(LIBRARY_DIRS) $(TARGET)
+endif
+
+
+#Pull in dependency info for *existing* .o files
+-include $(OBJECTS:.$(OBJEXT)=.$(DEPEXT))
+
+#Link
+$(TARGET): $(OBJECTS) $(LIBRARY_DIRS)
+	$(CC) $(LINK_FLAGS) -g -o $(TARGET) $(OBJECTS) ${LIBS} -T ${LINKER}
+
+
+#Compile
+$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CEXT)
+	@mkdir -p $(dir $@)
+	$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
+	@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CEXT) > $(BUILDDIR)/$*.$(DEPEXT)
+	@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
+	@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
+	@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
+	@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
+
+# gcc won't output dependencies for assembly files for some reason
+# most asm files don't have dependencies so the echo will work for now.
+$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(AEXT)
+	@mkdir -p $(dir $@)
+	$(CC) $(CFLAGS) -c -o $@ $< > $(BUILDDIR)/$*.list
+	@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
+
+$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(SEXT)
+	@mkdir -p $(dir $@)
+	$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
+	@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
+
+# C++
+$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CPPEXT)
+	@mkdir -p $(dir $@)
+	$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
+	@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CPPEXT) > $(BUILDDIR)/$*.$(DEPEXT)
+	@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
+	@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
+	@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
+	@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
+
+# convert to hex
+$(TARGET).memfile: $(TARGET)
+	@echo 'Making object dump file.'
+	@riscv64-unknown-elf-objdump -D $< > $<.objdump
+	@echo 'Making memory file'
+	riscv64-unknown-elf-elf2hex --bit-width 64 --input $^ --output $@
+	extractFunctionRadix.sh $<.objdump
+	mkdir -p ../work/
+	cp -f $(TARGETDIR)/* ../work/
diff --git a/tests/custom/spitest/spi.h b/tests/custom/spitest/spi.h
new file mode 100644
index 000000000..2b1d541da
--- /dev/null
+++ b/tests/custom/spitest/spi.h
@@ -0,0 +1,116 @@
+///////////////////////////////////////////////////////////////////////
+// spi.h
+//
+// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024
+//
+// Purpose: Header file for interfaceing with the SPI peripheral
+//
+// 
+//
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the
+// “License”); you may not use this file except in compliance with the
+// License, or, at your option, the Apache License version 2.0. You
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work
+// distributed under the License is distributed on an “AS IS” BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#pragma once
+#ifndef SPI_HEADER
+#define SPI_HEADER
+
+#include <stdint.h>
+
+#define SPI_BASE              0x13000 /* Base address of SPI device used for SDC */
+
+/* register offsets */
+#define SPI_SCKDIV            SPI_BASE + 0x00 /* Serial clock divisor */
+#define SPI_SCKMODE           SPI_BASE + 0x04 /* Serial clock mode */
+#define SPI_CSID              SPI_BASE + 0x10 /* Chip select ID */
+#define SPI_CSDEF             SPI_BASE + 0x14 /* Chip select default */
+#define SPI_CSMODE            SPI_BASE + 0x18 /* Chip select mode */
+#define SPI_DELAY0            SPI_BASE + 0x28 /* Delay control 0 */
+#define SPI_DELAY1            SPI_BASE + 0x2c /* Delay control 1 */
+#define SPI_FMT               SPI_BASE + 0x40 /* Frame format */
+#define SPI_TXDATA            SPI_BASE + 0x48 /* Tx FIFO data */
+#define SPI_RXDATA            SPI_BASE + 0x4c /* Rx FIFO data */
+#define SPI_TXMARK            SPI_BASE + 0x50 /* Tx FIFO [<35;39;29Mwatermark */
+#define SPI_RXMARK            SPI_BASE + 0x54 /* Rx FIFO watermark */
+
+/* Non-implemented
+#define SPI_FCTRL             SPI_BASE + 0x60 // SPI flash interface control
+#define SPI_FFMT              SPI_BASE + 0x64 // SPI flash instruction format
+*/
+#define SPI_IE                SPI_BASE + 0x70 /* Interrupt Enable Register */
+#define SPI_IP                SPI_BASE + 0x74 /* Interrupt Pendings Register */
+
+/* delay0 bits */
+#define SIFIVE_SPI_DELAY0_CSSCK(x)       ((uint32_t)(x))
+#define SIFIVE_SPI_DELAY0_CSSCK_MASK     0xffU
+#define SIFIVE_SPI_DELAY0_SCKCS(x)       ((uint32_t)(x) << 16)
+#define SIFIVE_SPI_DELAY0_SCKCS_MASK     (0xffU << 16)
+
+/* delay1 bits */
+#define SIFIVE_SPI_DELAY1_INTERCS(x)     ((uint32_t)(x))
+#define SIFIVE_SPI_DELAY1_INTERCS_MASK   0xffU
+#define SIFIVE_SPI_DELAY1_INTERXFR(x)    ((uint32_t)(x) << 16)
+#define SIFIVE_SPI_DELAY1_INTERXFR_MASK  (0xffU << 16)
+
+/* csmode bits */
+#define SIFIVE_SPI_CSMODE_MODE_AUTO      0U
+#define SIFIVE_SPI_CSMODE_MODE_HOLD      2U
+#define SIFIVE_SPI_CSMODE_MODE_OFF       3U
+
+// inline void write_reg(uintptr_t addr, uint32_t value);
+//inline uint32_t read_reg(uintptr_t addr);
+//inline void spi_sendbyte(uint8_t byte);
+//inline void waittx();
+//inline void waitrx();
+uint8_t spi_txrx(uint8_t byte);
+uint8_t spi_dummy();
+//inline uint8_t spi_readbyte();
+//uint64_t spi_read64();
+void spi_init();
+void spi_set_clock(uint32_t clkin, uint32_t clkout);
+
+static inline void write_reg(uintptr_t addr, uint32_t value) {
+  volatile uint32_t * loc = (volatile uint32_t *) addr;
+  *loc = value;
+}
+
+// Read a register
+static inline uint32_t read_reg(uintptr_t addr) {
+  return *(volatile uint32_t *) addr;
+}
+
+// Queues a single byte in the transfer fifo
+static inline void spi_sendbyte(uint8_t byte) {
+  // Write byte to transfer fifo
+  write_reg(SPI_TXDATA, byte);
+}
+
+static inline void waittx() {
+  while(!(read_reg(SPI_IP) & 1)) {}
+}
+
+static inline void waitrx() {
+  while(read_reg(SPI_IP) & 2) {}
+}
+
+static inline uint8_t spi_readbyte() {
+  return read_reg(SPI_RXDATA);
+}
+
+#endif
diff --git a/tests/custom/spitest/spitest.c b/tests/custom/spitest/spitest.c
new file mode 100644
index 000000000..23d408c16
--- /dev/null
+++ b/tests/custom/spitest/spitest.c
@@ -0,0 +1,107 @@
+///////////////////////////////////////////////////////////////////////
+// spi.c
+//
+// Written: Jaocb Pease jacob.pease@okstate.edu 8/27/2024
+//
+// Purpose: C code to test SPI bugs
+//
+// 
+//
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the
+// “License”); you may not use this file except in compliance with the
+// License, or, at your option, the Apache License version 2.0. You
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work
+// distributed under the License is distributed on an “AS IS” BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+///////////////////////////////////////////////////////////////////////
+
+#include "spi.h"
+
+// Testing SPI peripheral in loopback mode
+// TODO: Need to make sure the configuration I'm using uses loopback
+//       mode. This can be specified in derivlists.txt
+// TODO:
+
+uint8_t spi_txrx(uint8_t byte) {
+  spi_sendbyte(byte);
+  waittx();
+  return spi_readbyte();
+}
+
+uint8_t spi_dummy() {
+  return spi_txrx(0xff);
+}
+
+void spi_set_clock(uint32_t clkin, uint32_t clkout) {
+  uint32_t div = (clkin/(2*clkout)) - 1;
+  write_reg(SPI_SCKDIV, div);
+}
+
+// Initialize Sifive FU540 based SPI Controller
+void spi_init(uint32_t clkin) {
+  // Enable interrupts
+  write_reg(SPI_IE, 0x3);
+
+  // Set TXMARK to 1. If the number of entries is < 1
+  // IP's txwm field will go high.
+  // Set RXMARK to 0. If the number of entries is > 0
+  // IP's rwxm field will go high.
+  write_reg(SPI_TXMARK, 1);
+  write_reg(SPI_RXMARK, 0);
+
+  // Set Delay 0 to default
+  write_reg(SPI_DELAY0,
+            SIFIVE_SPI_DELAY0_CSSCK(1) |
+			SIFIVE_SPI_DELAY0_SCKCS(1));
+
+  // Set Delay 1 to default
+  write_reg(SPI_DELAY1,
+            SIFIVE_SPI_DELAY1_INTERCS(1) |
+            SIFIVE_SPI_DELAY1_INTERXFR(0));
+
+  // Initialize the SPI controller clock to 
+  // div = (20MHz/(2*400kHz)) - 1 = 24 = 0x18 
+  write_reg(SPI_SCKDIV, 0x18); 
+}
+
+void main() {
+  spi_init(100000000);
+
+  spi_set_clock(100000000,50000000);
+  
+  volatile uint8_t *p = (uint8_t *)(0x8F000000);
+  int j;
+  uint64_t n = 0;
+
+  write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_HOLD);
+  //n = 512/8;
+
+  n = 4;
+  do {
+    // Send 8 dummy bytes (fifo should be empty)
+    for (j = 0; j < 8; j++) {
+      spi_sendbyte(0xaa + j);
+    }
+    
+    // Reset counter. Process bytes AS THEY COME IN.
+    for (j = 0; j < 8; j++) {
+      while (!(read_reg(SPI_IP) & 2)) {}
+      uint8_t x = spi_readbyte();
+      *p++ = x;      
+    }
+  } while(--n > 0);
+
+  write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_AUTO);
+}
diff --git a/tests/custom/spitest/start.s b/tests/custom/spitest/start.s
new file mode 100644
index 000000000..57f66ce79
--- /dev/null
+++ b/tests/custom/spitest/start.s
@@ -0,0 +1,59 @@
+.section .init
+.global _start
+.type _start, @function
+
+_start:
+	  # Initialize global pointer
+	.option push
+	.option norelax
+	1:auipc gp, %pcrel_hi(__global_pointer$)
+	addi  gp, gp, %pcrel_lo(1b)
+	.option pop
+	
+	li x1, 0
+	li x2, 0
+	li x4, 0
+	li x5, 0
+	li x6, 0
+	li x7, 0
+	li x8, 0
+	li x9, 0
+	li x10, 0
+	li x11, 0
+	li x12, 0
+	li x13, 0
+	li x14, 0
+	li x15, 0
+	li x16, 0
+	li x17, 0
+	li x18, 0
+	li x19, 0
+	li x20, 0
+	li x21, 0
+	li x22, 0
+	li x23, 0
+	li x24, 0
+	li x25, 0
+	li x26, 0
+	li x27, 0
+	li x28, 0
+	li x29, 0
+	li x30, 0
+	li x31, 0
+
+
+
+	# set the stack pointer to the top of memory - 8 bytes (pointer size)
+	li sp, 0x87FFFFF8
+
+	jal ra, main
+	jal ra, _halt
+
+.section .text
+.global _halt
+.type _halt, @function
+_halt:
+	li gp, 1
+	li a0, 0
+	ecall
+	j _halt
diff --git a/tests/fp/combined_IF_vectors/create_IF_vectors.sh b/tests/fp/combined_IF_vectors/create_IF_vectors.sh
index 707b2d5f4..7fe5897fb 100755
--- a/tests/fp/combined_IF_vectors/create_IF_vectors.sh
+++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh
@@ -1,5 +1,7 @@
 #!/bin/sh
 # create test vectors for stand alone int
 
+mkdir IF_vectors
 ./extract_testfloat_vectors.py
 ./extract_arch_vectors.py
+cp IF_vectors/*  ../vectors