From fda6305d1c581a08b147650b409c087a7c89937e Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Tue, 27 Aug 2024 17:07:35 -0700
Subject: [PATCH] began porting over divremsqrt

---
 bin/regression-wally-intdiv                   |  561 ++++++
 config/shared/config-shared.vh                |    4 +
 config/shared/parameter-defs.vh               |    2 +
 src/cvw.sv                                    |    2 +
 src/fpu/divremsqrt/arithrightshift.sv         |    9 +
 src/fpu/divremsqrt/divremsqrt.sv              |  111 ++
 src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv  |   73 +
 src/fpu/divremsqrt/divremsqrtearlyterm.sv     |   27 +
 .../divremsqrt/divremsqrtfdivsqrtpostproc.sv  |  116 ++
 .../divremsqrt/divremsqrtfdivsqrtpreproc.sv   |  250 +++
 src/fpu/divremsqrt/divremsqrtflags.sv         |  183 ++
 .../divremsqrt/divremsqrtintspecialcase.sv    |   15 +
 src/fpu/divremsqrt/divremsqrtlzc.sv           |   39 +
 src/fpu/divremsqrt/divremsqrtnormshift.sv     |   81 +
 src/fpu/divremsqrt/divremsqrtpostprocess.sv   |  177 ++
 src/fpu/divremsqrt/divremsqrtround.sv         |  267 +++
 src/fpu/divremsqrt/divremsqrtroundsign.sv     |   45 +
 .../divremsqrt/divremsqrtshiftcorrection.sv   |   94 +
 src/fpu/divremsqrt/divremsqrtspecialcase.sv   |  240 +++
 src/fpu/divremsqrt/drsu.sv                    |  102 +
 src/fpu/divremsqrt/intrightshift.sv           |   37 +
 testbench/testbench-fp.sv                     | 1682 +++++++++++++++++
 .../combined_IF_vectors/create_IF_vectors.sh  |    2 +
 23 files changed, 4119 insertions(+)
 create mode 100755 bin/regression-wally-intdiv
 create mode 100644 src/fpu/divremsqrt/arithrightshift.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrt.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtearlyterm.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtflags.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtintspecialcase.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtlzc.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtnormshift.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtpostprocess.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtround.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtroundsign.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtspecialcase.sv
 create mode 100644 src/fpu/divremsqrt/drsu.sv
 create mode 100644 src/fpu/divremsqrt/intrightshift.sv
 create mode 100644 testbench/testbench-fp.sv

diff --git a/bin/regression-wally-intdiv b/bin/regression-wally-intdiv
new file mode 100755
index 000000000..5efe0dc24
--- /dev/null
+++ b/bin/regression-wally-intdiv
@@ -0,0 +1,561 @@
+#!/usr/bin/python3
+##################################
+#
+# regression-wally
+# David_Harris@Hmc.edu 25 January 2021
+# Modified by Jarred Allen <jaallen@g.hmc.edu>
+#
+# Run a regression with multiple configurations in parallel and exit with
+# non-zero status code if an error happened, as well as printing human-readable
+# output.
+#
+##################################
+import sys,os,shutil
+import multiprocessing
+
+
+
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+from collections import namedtuple
+regressionDir = os.path.dirname(os.path.abspath(__file__))
+os.chdir(regressionDir)
+
+coverage = '-coverage' in sys.argv
+fp = '-fp' in sys.argv
+nightly = '-nightly' in sys.argv
+softfloat = '-softfloat' in sys.argv
+intdiv = '-intdiv' in sys.argv
+
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
+# name:     the name of this test configuration (used in printing human-readable
+#           output and picking logfile names)
+# cmd:      the command to run to test (should include the logfile as '{}', and
+#           the command needs to write to that file)
+# grepstr:  the string to grep through the log file for. The test succeeds iff
+#           grep finds that string in the logfile (is used by grep, so it may
+#           be any pattern grep accepts, see `man 1 grep` for more info).
+
+# edit this list to add more test cases
+if (nightly):
+    nightMode = "-nightly";
+    configs = []
+else:
+    nightMode = "";
+    configs = [
+        TestCase(
+            name="lints",
+            variant="all",
+            cmd="./lint-wally " + nightMode + " | tee {}",
+            grepstr="lints run with no errors or warnings"
+        )
+    ]
+
+def getBuildrootTC(boot):
+    INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
+    MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
+    if boot:
+        name="buildrootboot"
+        BRcmd="vsim > {} -c <<!\ndo wally.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
+        BRgrepstr="WallyHostname login:"
+    else:
+        name="buildroot"
+        if (coverage):
+            print( "buildroot coverage")
+            BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0 -coverage\n!"
+        else:
+            print( "buildroot no coverage")
+            BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot configOptions -GINSTR_LIMIT=" +str(INSTR_LIMIT) + " \n!"
+        BRgrepstr=str(INSTR_LIMIT)+" instructions"
+    return  TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
+
+tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused
+
+tests64i = ["arch64i"] 
+for test in tests64i:
+  tc = TestCase(
+        name=test,
+        variant="rv64i",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv64i "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused
+tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32f_divsqrt", "arch32d_divsqrt", 
+             "arch32i", "arch32priv", "arch32c",  "arch32m", "arch32a", "arch32zifencei", "arch32zicond", 
+             "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zfh", "arch32zfh_fma", 
+             "arch32zfh_divsqrt", "arch32zfaf", "wally32a", "wally32priv", "wally32periph", 
+             "arch32zbkb", "arch32zbkc", "arch32zbkx", "arch32zknd", "arch32zkne", "arch32zknh"]  # "arch32zbc", "arch32zfad",
+#tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32i", "arch32priv", "arch32c",  "arch32m", "arch32a", "arch32zifencei", "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zicboz", "arch32zcb", "wally32a",  "wally32priv", "wally32periph"]  
+for test in tests32gc:
+  tc = TestCase(
+        name=test,
+        variant="rv32gc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests32imcimperas = ["imperas32i", "imperas32c"] # unused
+tests32imc = ["arch32i", "arch32c", "arch32m", "wally32periph"] 
+for test in tests32imc:
+  tc = TestCase(
+        name=test,
+        variant="rv32imc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32imc "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests32i = ["arch32i"] 
+for test in tests32i:
+  tc = TestCase(
+        name=test,
+        variant="rv32i",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32i "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+
+tests32e = ["arch32e"] 
+for test in tests32e:
+  tc = TestCase(
+        name=test,
+        variant="rv32e",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv32e "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64f_divsqrt", "arch64d_divsqrt", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",  "arch64zfh", "arch64zfh_divsqrt", "arch64zfh_fma", "arch64zfaf", "arch64zfad", "arch64zbkb", "arch64zbkc", "arch64zbkx", "arch64zknd", "arch64zkne", "arch64zknh",
+             "arch64priv", "arch64c",  "arch64m", "arch64a", "arch64zifencei", "arch64zicond", "wally64a", "wally64periph", "wally64priv"] # add arch64zfh_fma when available; arch64zicobz, arch64zcb when working
+#tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", 
+#             "arch64priv", "arch64c",  "arch64m", "arch64a", "arch64zifencei", "wally64a", "wally64periph", "wally64priv", "arch64zicboz", "arch64zcb"] 
+if (coverage):  # delete all but 64gc tests when running coverage
+    configs = []
+    tests64gc = ["coverage64gc", "arch64i", "arch64priv", "arch64c",  "arch64m",
+                 "arch64zifencei", "arch64zicond", "arch64a", "wally64a", "wally64periph", "wally64priv", 
+                 "arch64zba",  "arch64zbb",  "arch64zbc", "arch64zbs"] # add when working: "arch64zcb", "arch64zicboz"
+    if (fp):
+       tests64gc.append("arch64f")
+       tests64gc.append("arch64d")
+       tests64gc.append("arch64zfh")
+       tests64gc.append("arch64f_fma")
+       tests64gc.append("arch64d_fma") 
+       tests64gc.append("arch64zfh_fma")
+       tests64gc.append("arch64f_divsqrt")
+       tests64gc.append("arch64d_divsqrt")
+       tests64gc.append("arch64zfh_divsqrt")
+       tests64gc.append("arch64zfaf")
+       tests64gc.append("arch64zfad")
+    coverStr = '-coverage'
+else:
+   coverStr = ''
+for test in tests64gc:
+  tc = TestCase(
+        name=test,
+        variant="rv64gc",
+        cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+" " + coverStr + "\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
+
+# run derivative configurations if requested  
+if (nightly):
+    derivconfigtests = [
+        ["tlb2_rv32gc", ["wally32priv"]],
+        ["tlb16_rv32gc", ["wally32priv"]],
+        ["tlb2_rv64gc", ["wally64priv"]],
+        ["tlb16_rv64gc", ["wally64priv"]],
+        ["way_1_4096_512_rv32gc", ["arch32i"]],
+        ["way_2_4096_512_rv32gc", ["arch32i"]],
+        ["way_8_4096_512_rv32gc", ["arch32i"]],
+        ["way_4_2048_512_rv32gc", ["arch32i"]],
+        ["way_4_4096_256_rv32gc", ["arch32i"]],
+        ["way_1_4096_512_rv64gc", ["arch64i"]],
+        ["way_2_4096_512_rv64gc", ["arch64i"]],
+        ["way_8_4096_512_rv64gc", ["arch64i"]],
+        ["way_4_2048_512_rv64gc", ["arch64i"]],
+        ["way_4_4096_256_rv64gc", ["arch64i"]],
+        ["way_4_4096_1024_rv64gc", ["arch64i"]],
+
+        ["ram_0_0_rv64gc", ["ahb64"]],
+        ["ram_1_0_rv64gc", ["ahb64"]],
+        ["ram_1_1_rv64gc", ["ahb64"]],
+        ["ram_2_0_rv64gc", ["ahb64"]],
+        ["ram_2_1_rv64gc", ["ahb64"]],
+        
+        ["noicache_rv32gc", ["ahb32"]],
+# cacheless designs will not work until DTIM supports FLEN > XLEN
+#        ["nodcache_rv32gc", ["ahb32"]],
+#        ["nocache_rv32gc", ["ahb32"]],
+        ["noicache_rv64gc", ["ahb64"]],
+        ["nodcache_rv64gc", ["ahb64"]],
+        ["nocache_rv64gc", ["ahb64"]],
+
+        ### add misaligned tests
+
+        ["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
+        ["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+        ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
+
+        ### branch predictor simulation
+
+        # ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],        
+        # ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+        # ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+        # # btb
+        # ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+        # # ras
+        # ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+        # ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
+
+#  enable floating-point tests when lint is fixed
+        ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
+        ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
+        ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
+        ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
+        ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
+        ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
+        ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed
+        ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
+        ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
+        ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
+        
+
+    ]
+    for test in derivconfigtests:
+        config = test[0];
+        tests = test[1];
+        if(len(test) >= 4 and test[2] == "configOptions"):
+            configOptions = test[3]
+            cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
+        else:
+            configOptions = ""
+            cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
+        for t in tests:
+            tc = TestCase(
+                    name=t,
+                    variant=config,
+                    cmd=cmdPrefix+" "+t+" configOptions "+configOptions+"\n!",
+                    grepstr="All tests ran without failures")
+            configs.append(tc)
+
+
+
+
+# softfloat tests
+if (softfloat):
+    configs = []
+    softfloatconfigs = [
+    "fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
+    "fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
+    "fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
+    "fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
+    "fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
+    "fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
+    "fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
+    "fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
+    "fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
+    "fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
+    "fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
+    "fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
+    "fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
+    "fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
+    "fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
+    "fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
+    "fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
+    "fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
+    "fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
+    "fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
+    "f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
+    "f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
+    "f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
+    "f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
+    ]
+    for config in softfloatconfigs:
+        # div test case
+        divtest = TestCase(
+            name="div",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " div \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,divtest)
+
+        # sqrt test case
+        sqrttest = TestCase(
+            name="sqrt",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " sqrt \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        #configs.append(sqrttest)
+        configs.insert(0,sqrttest)
+
+
+        # skip if divider variant config
+        if ("ieee" in config):
+            # cvtint test case
+            cvtinttest = TestCase(
+                name="cvtint",
+                variant=config,
+                cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " cvtint \n!",
+                grepstr="All Tests completed with          0 errors"
+                )
+            configs.append(cvtinttest)
+
+            # cvtfp test case
+            # WILL fail on F_only (refer to spec)
+            cvtfptest = TestCase(
+                name="cvtfp",
+                variant=config,
+                cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " cvtfp \n!",
+                grepstr="All Tests completed with          0 errors"
+            )
+            configs.append(cvtfptest)    
+      
+# intdiv verification
+if (intdiv):
+    configs = []
+    # ***NOTE add to this
+    
+    intdivconfigs = [
+    "fdh_ieee_div_2_1i_rv32gc", "fdh_ieee_div_2_1i_rv64gc", "fdh_ieee_div_2_2i_rv32gc",
+    "fdh_ieee_div_2_2i_rv64gc", "fdh_ieee_div_2_4i_rv32gc", "fdh_ieee_div_2_4i_rv64gc",
+    "fdh_ieee_div_4_1i_rv32gc", "fdh_ieee_div_4_1i_rv64gc", "fdh_ieee_div_4_2i_rv32gc",
+    "fdh_ieee_div_4_2i_rv64gc", "fdh_ieee_div_4_4i_rv32gc", "fdh_ieee_div_4_4i_rv64gc",
+    "fd_ieee_div_2_1i_rv32gc", "fd_ieee_div_2_1i_rv64gc", "fd_ieee_div_2_2i_rv32gc",
+    "fd_ieee_div_2_2i_rv64gc", "fd_ieee_div_2_4i_rv32gc", "fd_ieee_div_2_4i_rv64gc",
+    "fd_ieee_div_4_1i_rv32gc", "fd_ieee_div_4_1i_rv64gc", "fd_ieee_div_4_2i_rv32gc",
+    "fd_ieee_div_4_2i_rv64gc", "fd_ieee_div_4_4i_rv32gc", "fd_ieee_div_4_4i_rv64gc",
+    "fdqh_ieee_div_2_1i_rv32gc", "fdqh_ieee_div_2_1i_rv64gc", "fdqh_ieee_div_2_2i_rv32gc",
+    "fdqh_ieee_div_2_2i_rv64gc", "fdqh_ieee_div_2_4i_rv32gc", "fdqh_ieee_div_2_4i_rv64gc",
+    "fdqh_ieee_div_4_1i_rv32gc", "fdqh_ieee_div_4_1i_rv64gc", "fdqh_ieee_div_4_2i_rv32gc",
+    "fdqh_ieee_div_4_2i_rv64gc", "fdqh_ieee_div_4_4i_rv32gc", "fdqh_ieee_div_4_4i_rv64gc",
+    "fdq_ieee_div_2_1i_rv32gc", "fdq_ieee_div_2_1i_rv64gc", "fdq_ieee_div_2_2i_rv32gc",
+    "fdq_ieee_div_2_2i_rv64gc", "fdq_ieee_div_2_4i_rv32gc", "fdq_ieee_div_2_4i_rv64gc",
+    "fdq_ieee_div_4_1i_rv32gc", "fdq_ieee_div_4_1i_rv64gc", "fdq_ieee_div_4_2i_rv32gc",
+    "fdq_ieee_div_4_2i_rv64gc", "fdq_ieee_div_4_4i_rv32gc", "fdq_ieee_div_4_4i_rv64gc",
+    "fh_ieee_div_2_1i_rv32gc", "fh_ieee_div_2_1i_rv64gc", "fh_ieee_div_2_2i_rv32gc",
+    "fh_ieee_div_2_2i_rv64gc", "fh_ieee_div_2_4i_rv32gc", "fh_ieee_div_2_4i_rv64gc",
+    "fh_ieee_div_4_1i_rv32gc", "fh_ieee_div_4_1i_rv64gc", "fh_ieee_div_4_2i_rv32gc",
+    "fh_ieee_div_4_2i_rv64gc", "fh_ieee_div_4_4i_rv32gc", "fh_ieee_div_4_4i_rv64gc",
+    "f_ieee_div_2_1i_rv32gc", "f_ieee_div_2_1i_rv64gc", "f_ieee_div_2_2i_rv32gc",
+    "f_ieee_div_2_2i_rv64gc", "f_ieee_div_2_4i_rv32gc", "f_ieee_div_2_4i_rv64gc",
+    "f_ieee_div_4_1i_rv32gc", "f_ieee_div_4_1i_rv64gc", "f_ieee_div_4_2i_rv32gc",
+    "f_ieee_div_4_2i_rv64gc", "f_ieee_div_4_4i_rv32gc", "f_ieee_div_4_4i_rv64gc",
+    "fd_ieee_div_2_8i_rv32gc",
+    "fd_ieee_div_2_8i_rv64gc",
+    "fdq_ieee_div_2_8i_rv64gc",
+    "fdq_ieee_div_2_8i_rv32gc",
+    "f_ieee_div_2_8i_rv64gc",
+    "f_ieee_div_2_8i_rv32gc"
+    ]
+    nointdivconfigs = [
+    "fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
+    "fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
+    "fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
+    "fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
+    "fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
+    "fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
+    "fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
+    "fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
+    "fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
+    "fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
+    "fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
+    "fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
+    "fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
+    "fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
+    "fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
+    "fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
+    "fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
+    "fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
+    "fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
+    "fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
+    "f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
+    "f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
+    "f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
+    "f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
+    ]
+
+    for config in intdivconfigs:
+        # fdivremsqrt test case
+        fdivremsqrttestcase = TestCase(
+            name="fdivremsqrt",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " fdivremsqrt \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,fdivremsqrttestcase)
+    for config in nointdivconfigs:
+        # div,sqrt test cases for no integer flavor of divider
+        divtestcase = TestCase(
+            name="fdiv",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " div_drsu \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,divtestcase)
+        sqrttestcase = TestCase(
+            name="fsqrt",
+            variant=config,
+            cmd="vsim > {} -c  <<!\ndo testfloat-batch.do " + config + " sqrt_drsu \n!",
+            grepstr="All Tests completed with          0 errors"
+        )
+        configs.insert(0,sqrttestcase)
+
+import os
+from multiprocessing import Pool, TimeoutError
+
+def search_log_for_text(text, logfile):
+    """Search through the given log file for text, returning True if it is found or False if it is not"""
+    grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile)
+    return os.system(grepcmd) == 0
+
+def run_test_case(config):
+    """Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
+    logname = "logs/"+config.variant+"_"+config.name+".log"
+    cmd = config.cmd.format(logname)
+#    print(cmd)
+    os.chdir(regressionDir)
+    os.system(cmd)
+    if search_log_for_text(config.grepstr, logname):
+        print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name))
+        return 0
+    else:
+        print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name))
+        print("  Check %s" % logname)
+        return 1
+
+def main():
+    """Run the tests and count the failures"""
+    global configs, coverage
+    try:
+        os.chdir(regressionDir)
+        os.mkdir("logs")
+    except:
+        pass
+    try:
+        shutil.rmtree("wkdir")
+    except:
+        pass
+    finally:
+        os.mkdir("wkdir")
+ 
+    if '-makeTests' in sys.argv:
+        os.chdir(regressionDir)
+        os.system('./make-tests.sh | tee ./logs/make-tests.log')
+
+    if '-all' in sys.argv:
+        TIMEOUT_DUR = 30*7200 # seconds
+        configs.append(getBuildrootTC(boot=True))
+    elif '-buildroot' in sys.argv:
+        TIMEOUT_DUR = 30*7200 # seconds
+        configs=[getBuildrootTC(boot=True)]
+    elif '-coverage' in sys.argv:
+        TIMEOUT_DUR = 20*60 # seconds    
+    #   Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage.
+    #   Also it is slow to run.   
+    #    configs.append(getBuildrootTC(boot=False))
+        os.system('rm -f cov/*.ucdb')
+    elif '-nightly' in sys.argv:
+        TIMEOUT_DUR = 60*1440 # 1 day
+        configs.append(getBuildrootTC(boot=False))
+    elif '-softfloat' in sys.argv:
+        TIMEOUT_DUR = 60*60 # seconds
+    elif '-intdiv' in sys.argv:
+        TIMEOUT_DUR = 60*60 # seconds
+    else:
+        TIMEOUT_DUR = 10*60 # seconds
+        configs.append(getBuildrootTC(boot=False))
+
+    # Scale the number of concurrent processes to the number of test cases, but
+    # max out at a limited number of concurrent processes to not overwhelm the system
+    with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool:
+       num_fail = 0
+       results = {}
+       for config in configs:
+           results[config] = pool.apply_async(run_test_case,(config,))
+       for (config,result) in results.items():
+           try:
+             num_fail+=result.get(timeout=TIMEOUT_DUR)
+           except TimeoutError:
+             num_fail+=1
+             print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
+
+    # Coverage report
+    if coverage:
+       os.system('make coverage')
+    # Count the number of failures
+    if num_fail:
+        print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
+    else:
+        print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}")
+    return num_fail
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh
index 91e1d4100..445dc392f 100644
--- a/config/shared/config-shared.vh
+++ b/config/shared/config-shared.vh
@@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN
 
 localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));                  // log_2(NORMSHIFTSZ)
 
+localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF));
+localparam NORMSHIFTSZDRSU = DIVb+1+NF;
+localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU);
+
 // Disable spurious Verilator warnings
 
 /* verilator lint_off STMTDLY */
diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh
index c80b00232..bb036c94d 100644
--- a/config/shared/parameter-defs.vh
+++ b/config/shared/parameter-defs.vh
@@ -194,6 +194,8 @@ localparam cvw_t P = '{
   FMALEN : FMALEN,
   NORMSHIFTSZ : NORMSHIFTSZ,
   LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
+  NORMSHIFTSZDRSU : NORMSHIFTSZDRSU,
+  LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU,
   LOGR        : LOGR,
   RK          : RK,
   FPDUR       : FPDUR,
diff --git a/src/cvw.sv b/src/cvw.sv
index ed0493484..94006274b 100644
--- a/src/cvw.sv
+++ b/src/cvw.sv
@@ -285,6 +285,8 @@ typedef struct packed {
   int LOGCVTLEN;
   int NORMSHIFTSZ;
   int LOGNORMSHIFTSZ;
+  int NORMSHIFTSZDRSU;
+  int LOGNORMSHIFTSZDRSU;
   int FMALEN;
 
 // division constants
diff --git a/src/fpu/divremsqrt/arithrightshift.sv b/src/fpu/divremsqrt/arithrightshift.sv
new file mode 100644
index 000000000..624a54751
--- /dev/null
+++ b/src/fpu/divremsqrt/arithrightshift.sv
@@ -0,0 +1,9 @@
+
+module arithrightshift import cvw::*;  #(parameter cvw_t P) (
+  input logic signed [P.INTDIVb+3:0] shiftin,
+  output logic signed [P.INTDIVb+3:0] shifted
+);
+  assign shifted = $signed(shiftin) >>> P.LOGR;
+
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv
new file mode 100644
index 000000000..f57a3f8dd
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrt.sv
@@ -0,0 +1,111 @@
+///////////////////////////////////////////
+// divremsqrt.sv
+//
+// Written: kekim@hmc.edu
+// Modified:19 May 2023
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+ module divremsqrt import cvw::*;  #(parameter cvw_t P) (
+  input  logic                clk, 
+  input  logic                reset, 
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                XsE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
+  input  logic                XInfE, YInfE, 
+  input  logic                XZeroE, YZeroE, 
+  input  logic                XNaNE, YNaNE, 
+  input  logic                FDivStartE, IDivStartE,
+  input  logic                StallM,
+  input  logic                FlushE,
+  input  logic                SqrtE, SqrtM,
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [2:0]          Funct3E, Funct3M,
+  input  logic                IntDivE, W64E,
+  output logic                DivStickyM,
+  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
+  output logic [P.NE+1:0]      UeM,
+  output logic [P.DIVb:0]      UmM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,
+  output logic                 IntDivM,
+  // integer normalization shifter signals
+  output logic [P.INTDIVb+3:0]          PreResultM,
+  input logic [P.XLEN-1:0]          PreIntResultM,
+  output logic [P.DIVBLEN-1:0]       IntNormShiftM
+
+);
+
+  // Floating-point division and square root module, with optional integer division and remainder
+  // Computes X/Y, sqrt(X), A/B, or A%B
+
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
+  logic                       Firstun;                      // Quotient selection
+  logic                       WZeroE;                       // Early termination flag
+  logic [P.DURLEN:0]         CyclesE;                      // FSM cycles
+  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
+  logic                       DivStartE;                    // Enable signal for flops during stall
+                                                            
+  // Integer div/rem signals                                
+  logic                       BZeroM;                       // Denominator is zero
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic                       NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM;   // Special handling for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic                       ISpecialCaseE;                // Integer div/remainder special cases
+
+
+  divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc(                          // Preprocessor
+    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
+    // Int-specific 
+    .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
+    .BZeroM, .AM, 
+    .IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM);
+
+  fdivsqrtfsm #(P) fdivsqrtfsm(                                  // FSM
+    .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
+    .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
+    // Int-specific 
+    .IDivStartE, .ISpecialCaseE, .IntDivE);
+
+  fdivsqrtiter #(P) fdivsqrtiter(                                // CSA Iterator
+    .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
+    .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
+
+  divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc(                        // Postprocessor
+    .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
+    .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, 
+    .UmM, .WZeroE, .DivStickyM, 
+    // Int-specific 
+    .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, 
+    .FIntDivResultM,  .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM);
+  
+  
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
new file mode 100644
index 000000000..640735bef
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
@@ -0,0 +1,73 @@
+///////////////////////////////////////////
+// divshiftcalc.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Division shift calculation
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtdivshiftcalc import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.NF+2:0]              DivUm,              // divsqrt significand
+  input  logic [P.NE+1:0]              DivUe,              // divsqrt exponent
+  output logic [P.LOGNORMSHIFTSZDRSU-1:0]  DivShiftAmt,        // divsqrt shift amount
+  output logic [P.NORMSHIFTSZDRSU-1:0]     DivShiftIn,         // divsqrt shift input
+  output logic                         DivResSubnorm,      // is the divsqrt result subnormal
+  output logic                         DivSubnormShiftPos  // is the subnormal shift amount positive
+);
+
+  logic [P.LOGNORMSHIFTSZDRSU-1:0]         NormShift;          // normalized result shift amount
+  logic [P.LOGNORMSHIFTSZDRSU-1:0]         DivSubnormShiftAmt; // subnormal result shift amount (killed if negative)
+  logic [P.NE+1:0]                     DivSubnormShift;    // subnormal result shift amount
+
+  // is the result subnormal
+  // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
+  assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
+
+  // if the result is subnormal
+  //  00000000x.xxxxxx...                     Exp = DivUe
+  //  .00000000xxxxxxx... >> NF+1             Exp = DivUe+NF+1
+  //  .00xxxxxxxxxxxxx... << DivUe+NF+1       Exp = +1
+  //  .0000xxxxxxxxxxx... >> 1                Exp = 1
+  // Left shift amount      = DivUe+NF+1-1
+  assign DivSubnormShift    = (P.NE+2)'(P.NF)+DivUe;
+  assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
+
+  // if the result is normalized
+  //  00000000x.xxxxxx...                     Exp = DivUe
+  //  .00000000xxxxxxx... >> NF+1             Exp = DivUe+NF+1
+  //  00000000.xxxxxxx... << NF               Exp = DivUe+1
+  //  00000000x.xxxxxx... << NF               Exp = DivUe (extra shift done afterwards)
+  //  00000000xx.xxxxx... << 1?               Exp = DivUe-1 (determined after)
+  // inital Left shift amount  = NF
+  // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
+  assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF);
+
+  // if the shift amount is negative then don't shift (keep sticky bit)
+  // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
+  assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0;
+  assign DivShiftAmt        = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
+
+  // pre-shift the divider result for normalization
+  assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}};
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtearlyterm.sv b/src/fpu/divremsqrt/divremsqrtearlyterm.sv
new file mode 100644
index 000000000..3d9715ed4
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtearlyterm.sv
@@ -0,0 +1,27 @@
+module divremsqrtearlyterm import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.DIVb+3:0]    WS, WC,            // Q4.DIVb
+  input  logic [P.DIVb+3:0]    D,                 // Q4.DIVb
+  input  logic [P.DIVb:0]      FirstUM,   // U1.DIVb
+  input  logic [P.DIVb+1:0]    FirstC,            // Q2.DIVb
+  input  logic                 Firstun, SqrtE,
+  output logic                 WZeroE
+);
+  logic weq0E;
+  aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
+  if (P.RADIX == 2) begin: R2EarlyTerm
+    logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
+    logic [P.DIVb+2:0] FirstK;
+    logic wfeq0E;
+    logic [P.DIVb+3:0] WCF, WSF;
+
+    assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
+    assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0};    // F for square root
+    assign FZeroDivE =  D << 1;                                    // F for divide
+    mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
+    csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
+    aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
+    assign WZeroE = weq0E|wfeq0E;
+  end else begin
+    assign WZeroE = weq0E;
+  end 
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
new file mode 100644
index 000000000..e1c152227
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
@@ -0,0 +1,116 @@
+///////////////////////////////////////////
+// fdivsqrtpostproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Divide/Square root postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtfdivsqrtpostproc import cvw::*;  #(parameter cvw_t P) (
+  input  logic                 clk, reset,
+  input  logic                 StallM,
+  input  logic [P.DIVb+3:0]    WS, WC,            // Q4.DIVb
+  input  logic [P.DIVb+3:0]    D,                 // Q4.DIVb
+  input  logic [P.DIVb:0]      FirstU, FirstUM,   // U1.DIVb
+  input  logic [P.DIVb+1:0]    FirstC,            // Q2.DIVb
+  input  logic                 SqrtE,
+  input  logic                 Firstun, SqrtM, SpecialCaseM, 
+  input  logic [P.XLEN-1:0]    AM,                // U/Q(XLEN.0)
+  input  logic                 RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM,
+  input  logic [P.DIVBLEN-1:0] IntNormShiftM,
+  input  logic [P.XLEN-1:0]    PreIntResultM,
+  output logic [P.DIVb:0]      UmM,               // U1.DIVb result significand
+  output logic                 WZeroE,
+  output logic                 DivStickyM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,     // U/Q(XLEN.0)
+  output logic [P.INTDIVb+3:0]    PreResultM
+
+);
+  
+  logic [P.DIVb+3:0]         Sum;
+  logic [P.INTDIVb+3:0]         W;
+  logic [P.DIVb:0]           PreUmM;
+  logic                      NegStickyM;
+  logic                      weq0E, WZeroM;
+  logic [P.XLEN-1:0]         IntDivResultM;
+  logic                      NegQuotM; // Integer quotient is negative
+
+  //////////////////////////
+  // Execute Stage: Detect early termination for an exact result
+  //////////////////////////
+
+  // check for early termination on an exact result. 
+  divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS,.Firstun, .WZeroE);
+  
+
+  //////////////////////////
+  // E/M Pipeline register
+  //////////////////////////
+ 
+  flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
+
+  //////////////////////////
+  // Memory Stage: Postprocessing
+  //////////////////////////
+
+  //  If the result is not exact, the sticky should be set
+  assign DivStickyM = ~WZeroM & ~SpecialCaseM; 
+
+  // Determine if sticky bit is negative *** Full sum only needed for Integer
+  assign Sum = WC + WS;
+  assign NegStickyM = Sum[P.DIVb+3];
+  mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
+  mux2 #(P.DIVb+1)    ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
+
+   // Integer quotient or remainder correction, normalization, and special cases
+  if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
+    logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
+    logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM;
+    logic [P.INTDIVb+3:0] DTrunc, SumTrunc;
+
+    assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb];
+    assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb];
+    arithrightshift #(P) rshift(SumTrunc, W);
+
+    assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]};
+
+    // Integer remainder: sticky and sign correction muxes
+    assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
+    mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM);
+
+    // Select quotient or remainder and do normalization shift
+    mux2 #(P.INTDIVb+4)    presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM);
+    intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM);
+    mux2 #(P.INTDIVb+4)    preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM);
+
+    divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM);
+    // sign extend result for W64
+    if (P.XLEN==64) begin
+      mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0], 
+        {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
+        W64M, FIntDivResultM);
+    end else 
+      assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
+  end
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
new file mode 100644
index 000000000..61f9931ab
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
@@ -0,0 +1,250 @@
+///////////////////////////////////////////
+// fdivsqrtpreproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtfdivsqrtpreproc import cvw::*;  #(parameter cvw_t P) (
+  input  logic                 clk,
+  input  logic                 IFDivStartE, 
+  input  logic [P.NF:0]        Xm, Ym,      // Floating-point significands
+  input  logic [P.NE-1:0]      Xe, Ye,      // Floating-point exponents
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                 SqrtE,
+  input  logic                 XZeroE,
+  input  logic [2:0]           Funct3E,
+  output logic [P.NE+1:0]      UeM,         // biased exponent of result
+  output logic [P.DIVb+3:0]    X, D,        // Q4.DIVb
+  // Int-specific
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU 
+  input  logic                 IntDivE, W64E,
+  // Outputs
+  output logic                 ISpecialCaseE,
+  output logic [P.DURLEN:0]  CyclesE,
+  output logic [P.DIVBLEN-1:0] IntNormShiftM,
+  output logic                 ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM,
+  output logic                 AsM, BsM, BZeroM,
+  output logic [P.XLEN-1:0]    AM
+);
+
+  logic [P.DIVb:0]             Xnorm, Dnorm;
+  logic [P.DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
+  logic [P.NE+1:0]             UeE;                                 // Result Exponent (FP only)
+  logic [P.DIVb:0]             IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
+  logic [P.DIVBLEN-1:0]        mE, ell;                             // Leading zeros of inputs
+  logic [P.DIVBLEN-1:0]        IntResultBitsE;                      // bits in integer result
+  logic                        NumerZeroE;                          // Numerator is zero (X or A)
+  logic                        SIGNOVERFLOWE;
+  logic                        AZeroE, BZeroE;                      // A or B is Zero for integer division
+  logic                        SignedDivE;                          // signed division
+  logic                        AsE, BsE;                            // Signs of integer inputs
+  logic [P.XLEN-1:0]           AE;                                  // input A after W64 adjustment
+  logic                        ALTBE;
+  logic                        EvenExp;
+
+  logic [$clog2(P.RK):0] RightShiftX;
+  logic [P.DIVBLEN-1:0] ZeroDiff, p;
+
+
+  //////////////////////////////////////////////////////
+  // Integer Preprocessing
+  //////////////////////////////////////////////////////
+
+  if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
+    logic [P.XLEN-1:0] BE, PosA, PosB;
+
+    // Extract inputs, signs, zero, depending on W64 mode if applicable
+    assign SignedDivE = ~Funct3E[0];
+  
+    // Source handling
+    if (P.XLEN==64) begin // 64-bit, supports W64
+      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
+      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
+    end else begin // 32 bits only
+      assign AE = ForwardedSrcAE;
+      assign BE = ForwardedSrcBE;
+     end
+    assign AZeroE = ~(|AE);
+    assign BZeroE = ~(|BE);
+    assign AsE = AE[P.XLEN-1] & SignedDivE;
+    assign BsE = BE[P.XLEN-1] & SignedDivE; 
+
+    // Force integer inputs to be postiive
+    mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
+    mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
+
+    // Select integer or floating point inputs
+    mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
+    mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
+    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
+  end else begin // Int not supported
+    assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
+    assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
+    assign NumerZeroE = XZeroE;
+  end
+
+  //////////////////////////////////////////////////////
+  // Integer & FP leading zero and normalization shift
+  //////////////////////////////////////////////////////
+
+  // count leading zeros for Subnorm FP and to normalize integer inputs
+  divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell);
+  divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE);
+
+  // Normalization shift: shift leading one into most significant bit
+  assign Xnorm = (IFX << ell);
+  assign Dnorm = (IFD << mE); 
+
+  //////////////////////////////////////////////////////
+  // Integer Right Shift to digit boundary
+  //  Determine DivXShifted (X shifted to digit boundary)
+  //  and nE (number of fractional digits)
+  //////////////////////////////////////////////////////
+
+  assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
+
+  if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
+
+    // calculate number of result bits
+    assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
+    assign ALTBE = ZeroDiff[P.DIVBLEN-1];  // A less than B (A has more leading zeros)
+    assign SIGNOVERFLOWE = 1'b0;
+
+    mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);          
+
+    /* verilator lint_off WIDTH */
+    assign IntResultBitsE = P.LOGR + p;  // Total number of result bits (r integer bits plus p fractional bits)
+   
+    /* verilator lint_on WIDTH */
+
+    // Integer special cases (terminate immediately)
+    assign ISpecialCaseE = BZeroE | ALTBE;
+
+    // calculate right shift amount RightShiftX to complete in discrete number of steps
+    if (P.RK > 1) begin // more than 1 bit per cycle
+      
+      /* verilator lint_offf WIDTH */
+      assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
+      assign DivXShifted = DivX >> RightShiftX;                     // shift X by up to R*K-1 to complete in n steps
+      /* verilator lint_on WIDTH */
+    end else begin // radix 2 1 copy doesn't require shifting
+      assign DivXShifted = DivX;
+      assign RightShiftX = 0;
+    end
+  end else begin
+    assign ISpecialCaseE = 0;
+  end
+
+  //////////////////////////////////////////////////////
+  // Floating-Point Preprocessing
+  // Extend to Q4.b format
+  // shift square root to be in range [1/4, 1)
+  // Normalized numbers are shifted right by 1 if the exponent is odd
+  // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
+   //////////////////////////////////////////////////////
+
+
+  // Sqrt is initialized on step one as R(X-1), so depends on Radix
+  // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
+  // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
+  // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
+  // Now (X-1) is negative.  Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
+  // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
+  // This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
+  // Subtracting 2 is equivalent to adding 1110.  Subtracting 4 is equivalent to adding 1100.  Prepend leading 1s to do a free subtraction.
+  // This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
+  // Radix      Exponent odd          Exponent Even
+  // 2          x-2 = 2(x/2 - 1)      x/2 - 2 = 2(x/4 - 1)
+  // 4          2(x)-4 = 4(x/2 - 1))  2(x/2)-4 = 4(x/4 - 1)
+  // Summary: PreSqrtX = r(x/2or4 - 1)
+
+  logic [P.DIVb:0] PreSqrtX;
+  assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
+  mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even
+
+/*  
+  // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
+  // This saves one bit in DIVb because there is no initial right shift.
+  // However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
+  // That is an optimization for another day.
+  if (P.RADIX == 2) begin
+    logic [P.DIVb:0] PreSqrtX;    // U1.DIVb
+    mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
+    assign SqrtX = {3'b111, PreSqrtX};                          // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
+  end else begin
+    logic [P.DIVb+1:0] PreSqrtX;  // U2.DIVb
+    mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
+    assign SqrtX = {2'b11, PreSqrtX};                     // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
+  end
+*/
+
+  // Initialize X for division or square root
+  mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);                    
+
+  //////////////////////////////////////////////////////
+  // Selet integer or floating-point operands
+  //////////////////////////////////////////////////////
+ if (P.IDIV_ON_FPU) begin
+    mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+  end else begin
+    assign X = PreShiftX;
+  end
+
+  // Divisior register
+  flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
+ 
+  // Floating-point exponent
+  fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
+  flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
+
+  // Number of FSM cycles (to FSM)
+  fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
+
+  if (P.IDIV_ON_FPU) begin:intpipelineregs
+    logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
+    logic               RemOpE;
+
+    /* verilator lint_off WIDTH */
+    assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift.  rn = Cycles * r * k - r ***explain
+    assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1));           // m + b - (N-1) for remainder normalization shift
+    /* verilator lint_on WIDTH */
+    assign RemOpE = Funct3E[1];
+    mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
+
+    // pipeline registers
+    flopen #(1)          mdureg(clk, IFDivStartE, IntDivE,  IntDivM);
+    flopen #(1)         altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
+    flopen #(1)        bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
+    flopen #(1)        asignreg(clk, IFDivStartE, AsE,      AsM);
+    flopen #(1)        bsignreg(clk, IFDivStartE, BsE,      BsM);
+    flopen #(P.DIVBLEN)   nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); 
+    flopen #(P.XLEN)    srcareg(clk, IFDivStartE, AE,       AM);
+    if (P.XLEN==64) 
+      flopen #(1)        w64reg(clk, IFDivStartE, W64E,     W64M);
+  end
+
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv
new file mode 100644
index 000000000..dc480637b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtflags.sv
@@ -0,0 +1,183 @@
+
+///////////////////////////////////////////
+// flags.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Post-Processing flag calculation
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtflags import cvw::*;  #(parameter cvw_t P) (
+  input  logic                Xs,                     // X sign
+  input  logic [P.FMTBITS-1:0] OutFmt,                 // output format
+  input  logic                InfIn,                  // is a Inf input being used
+  input  logic                XInf, YInf,             // inputs are infinity
+  input  logic                NaNIn,                  // is a NaN input being used
+  input  logic                XSNaN, YSNaN,           // inputs are signaling NaNs
+  input  logic                XZero, YZero,           // inputs are zero
+  input  logic [P.NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
+  input  logic [P.NE+1:0]      Me,                     // exponent of the normalized sum
+  // rounding
+  input  logic                Plus1,                  // do you add one for rounding
+  input  logic                Round, Guard, Sticky,   // bits used to determine rounding
+  input  logic                UfPlus1,                // do you add one for rounding for the unbounded exponent result
+  // divsqrt
+  input  logic                DivOp,                  // conversion opperation?
+  input  logic                Sqrt,                   // Sqrt?
+  // flags
+  output logic                DivByZero,              // divide by zero flag
+  output logic                Overflow,               // overflow flag to select result
+  output logic                Invalid,                // invalid flag to select the result
+  output logic [4:0]          PostProcFlg             // flags
+);
+
+  logic               SigNaN;         // is an input a signaling NaN
+  logic               Inexact;        // final inexact flag
+  logic               FpInexact;      // floating point inexact flag
+  logic               DivInvalid;     // integer invalid flag
+  logic               Underflow;      // Underflow flag
+  logic               ResExpGteMax;   // is the result greater than or equal to the maximum floating point expoent
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Overflow
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // determine if the result exponent is greater than or equal to the maximum exponent or 
+  // the shift amount is greater than the integers size (for cvt to int)
+  // ShiftGtIntSz calculation:  
+  //      a left shift of intlen+1 is still in range but any more than that is an overflow
+  //              inital: |      64 0's         |    XLEN     |
+  //                      |      64 0's         |    XLEN     | << 64
+  //                      |      XLEN           |    00000... |
+  //      65 = ...0 0 0 0   0 1 0 0   0 0 0 1
+  //          |     or      | |     or      |
+  //      33 = ...0 0 0 0   0 0 1 0   0 0 0 1
+  //          |     or        | |     or    |
+  //      larger or equal if:
+  //          - any of the bits after the most significan 1 is one
+  //          - the most signifcant in 65 or 33 is still a one in the number and
+  //            one of the later bits is one
+  if (P.FPSIZES == 1) begin
+      assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+
+  end else if (P.FPSIZES == 2) begin    
+      assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
+
+  end else if (P.FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+              P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
+              P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
+              default: ResExpGteMax = 1'bx;
+          endcase
+
+  end else if (P.FPSIZES == 4) begin        
+      always_comb
+          case (OutFmt)
+              P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
+              P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
+              P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
+              P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
+          endcase
+  end
+
+
+  // calulate overflow flag:
+  //                 if the result is greater than or equal to the max exponent(not taking into account sign)
+  //                 |           and the exponent isn't negitive
+  //                 |           |                   if the input isnt infinity or NaN
+  //                 |           |                   |            
+  assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Underflow
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // calculate underflow flag: detecting tininess after rounding
+  //                  the exponent is negitive
+  //                  |                    the result is subnormal
+  //                  |                    |                    the result is normal and rounded from a Subnorm
+  //                  |                    |                    |                                      and if given an unbounded exponent the result does not round
+  //                  |                    |                    |                                      |                     and if the result is not exact
+  //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
+  //                  |                    |                    |                                      |                     |               |
+  //assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+  assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Inexact
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
+  //      - Don't set the underflow flag if an underflowed res isn't outputed
+  //assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
+  assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
+
+  //                  if the res is too small to be represented and not 0
+  //                  |                                     and if the res is not invalid (outside the integer bounds)
+  //                  |                                     |
+
+  // select the inexact flag to output
+  assign Inexact = FpInexact;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Invalid
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Set Invalid flag for following cases:
+  //   1) any input is a signaling NaN
+  //   2) Inf - Inf (unless x or y is NaN)
+  //   3) 0 * Inf
+
+  
+  assign SigNaN = (XSNaN) | (YSNaN) ;
+  
+  //invalid flag for division
+  assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
+
+  assign Invalid = SigNaN | (DivInvalid&DivOp);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Divide by Zero
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // if dividing by zero and not 0/0
+  //  - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
+  assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);  
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // final flags
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Combine flags
+  //      - to integer results do not set the underflow or overflow flags
+  assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
+
+endmodule
+
+
+
+
diff --git a/src/fpu/divremsqrt/divremsqrtintspecialcase.sv b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv
new file mode 100644
index 000000000..f15f2d075
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv
@@ -0,0 +1,15 @@
+module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) (
+    input logic BZeroM,RemOpM, ALTBM,
+    input logic [P.XLEN-1:0] AM,
+    input  signed [P.INTDIVb+3:0] PreIntResultM,
+    output logic [P.XLEN-1:0] IntDivResultM
+);
+always_comb
+      if (BZeroM) begin         // Divide by zero
+        if (RemOpM) IntDivResultM = AM;  
+        else        IntDivResultM = {(P.XLEN){1'b1}};
+     end else if (ALTBM) begin // Numerator is small
+        if (RemOpM) IntDivResultM = AM;
+        else        IntDivResultM = 0;
+     end else       IntDivResultM = PreIntResultM[P.XLEN-1:0];
+endmodule
\ No newline at end of file
diff --git a/src/fpu/divremsqrt/divremsqrtlzc.sv b/src/fpu/divremsqrt/divremsqrtlzc.sv
new file mode 100644
index 000000000..1fa14405b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtlzc.sv
@@ -0,0 +1,39 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Leading Zero Counter
+// 
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtlzc #(parameter WIDTH = 1) (
+  input  logic [WIDTH-1:0]            num,    // number to count the leading zeroes of
+  output logic [$clog2(WIDTH)-1:0]  ZeroCnt // the number of leading zeroes
+);
+
+  integer i;
+  
+  always_comb begin
+    i = 0;
+    while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1;  // search for leading one
+    ZeroCnt = i[$clog2(WIDTH)-1:0];
+  end
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtnormshift.sv b/src/fpu/divremsqrt/divremsqrtnormshift.sv
new file mode 100644
index 000000000..4fc51b4ad
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtnormshift.sv
@@ -0,0 +1,81 @@
+///////////////////////////////////////////
+// normshift.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: normalization shifter
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+    // convert shift
+    //      fp -> int:  | `XLEN  zeros      |     Mantissa      | 0's if necessary | << CalcExp
+    //          process:
+    //              - start - CalcExp = 1 + XExp - Largest Bias
+    //                  | `XLEN  zeros      |     Mantissa      | 0's if necessary |
+    //
+    //              - shift left 1 (1)
+    //                  | `XLEN-1 zeros |bit|     frac          | 0's if necessary |
+    //                                      . <- binary point
+    //
+    //              - shift left till unbiased exponent is 0 (XExp - Largest Bias)
+    //                  |  0's |     Mantissa      |      0's if necessary     |
+    //                  |     keep          |
+    //
+    //      fp -> fp:
+    //          - if result is subnormal or underflowed:
+    //              |  `NF-1  zeros   |     Mantissa      | 0's if necessary | << NF+CalcExp-1
+    //          process:
+    //             - start
+    //                 |     mantissa      | 0's |
+    //
+    //             - shift right by NF-1 (NF-1)
+    //                 |    `NF-1  zeros   |     mantissa      | 0's |
+    //
+    //             - shift left by CalcExp = XExp - Largest bias + new bias
+    //                 |   0's  |     mantissa      |     0's      |
+    //                 |       keep      |
+    //
+    //          - if the input is subnormal:
+    //                 |     lzcIn      | 0's if necessary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+    //
+    //      int -> fp: |     lzcIn      | 0's if necessary | << ZeroCnt+1
+    //              - plus 1 to shift out the first 1
+
+    // fma shift
+    //      |   00   |           Sm           | << LZA output
+    //             .
+    //      - two extra bits so we can correct for an LZA error of 1 or 2
+
+    // divsqrt shift
+    //      | Nf 0's |           Qm           | << calculated shift amount
+    //        .
+
+module divremsqrtnormshift import cvw::*;  #(parameter cvw_t P) (
+  input  logic [P.LOGNORMSHIFTSZDRSU-1:0]  ShiftAmt,   // shift amount
+  input  logic [P.NORMSHIFTSZDRSU-1:0]     ShiftIn,    // number to be shifted
+  output logic [P.NORMSHIFTSZDRSU-1:0]     Shifted     // shifted result
+);
+   
+  assign Shifted = ShiftIn << ShiftAmt;
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
new file mode 100644
index 000000000..661e48c81
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -0,0 +1,177 @@
+///////////////////////////////////////////
+// postprocess.sv
+//
+// Written: kekim@hmc.edu
+// Modified: 19 May 2023
+//
+// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
+  // general signals
+  input logic                             Xs, Ys,     // input signs
+  input logic  [P.NF:0]                    Xm, Ym,     // input mantissas
+  input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+  input logic  [P.FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
+  input logic  [3:0]                      OpCtrl,     // choose which opperation (look below for values)
+  input logic                             XZero, YZero,        // inputs are zero
+  input logic                             XInf, YInf,          // inputs are infinity
+  input logic                             XNaN, YNaN,          // inputs are NaN
+  input logic                             XSNaN, YSNaN,        // inputs are signaling NaNs
+  input logic  [1:0]                      PostProcSel,         // select result to be written to fp register
+  //fma signals
+  //divide signals
+  input logic                             DivSticky,  // divider sticky bit
+  input logic  [P.NE+1:0]                  DivUe,      // divsqrt exponent
+  input logic  [P.NF+2:0]                  DivUm,      // divsqrt significand
+  input logic  [P.DIVBLEN-1:0]             IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right)
+  input logic  [P.INTDIVb+3:0]          PreResultM, // integer result to be shifted
+  input logic                              IntDivM,
+  // final results
+  output logic [P.FLEN-1:0]                PostProcRes,// postprocessor final result
+  output logic [4:0]                      PostProcFlg, // postprocesser flags
+  output logic [P.XLEN-1:0]  PreIntResultM // normalized integer result
+  );
+
+  
+  // general signals
+  logic                       Rs;         // result sign
+  logic [P.NF-1:0]             Rf;         // Result fraction
+  logic [P.NE-1:0]             Re;         // Result exponent
+  logic                       Ms;         // norMalized sign
+  logic [P.NORMSHIFTSZDRSU-1:0]    Mf;         // norMalized fraction
+  logic [P.NE+1:0]             Me;         // normalized exponent
+  logic [P.NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
+  logic                       UfPlus1;    // do you add one (for determining underflow flag)
+  logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt;   // normalization shift amount
+  logic [P.NORMSHIFTSZDRSU-1:0]    ShiftIn;    // input to normalization shift
+  logic [P.NORMSHIFTSZDRSU-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
+  logic                       Plus1;      // add one to the final result?
+  logic                       Overflow;   // overflow flag used to select results
+  logic                       Invalid;    // invalid flag used to select results
+  logic                       Guard, Round, Sticky; // bits needed to determine rounding
+  logic [P.FMTBITS-1:0]        OutFmt;     // output format
+  // division singals
+  logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt;        // divsqrt shif amount
+  logic [P.NORMSHIFTSZDRSU-1:0]    DivShiftIn;         // divsqrt shift input
+  logic [P.NE+1:0]             Ue;                 // divsqrt corrected exponent after corretion shift
+  logic                       DivByZero;          // divide by zero flag
+  logic                       DivResSubnorm;      // is the divsqrt result subnormal
+  logic                       DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
+  // conversion signals
+  logic [P.CVTLEN+P.NF:0]       CvtShiftIn;         // number to be shifted for converter
+  logic [1:0]                 CvtNegResMsbs;      // most significant bits of possibly negated int result
+  logic [P.XLEN+1:0]           CvtNegRes;          // possibly negated integer result
+  logic                       CvtResUf;           // did the convert result underflow
+  logic                       IntInvalid;         // invalid integer flag
+  // readability signals
+  logic                       Mult;       // multiply opperation
+  logic                       Sqrt;       // is the divsqrt opperation sqrt
+  logic                       Int64;      // is the integer 64 bits?
+  logic                       Signed;     // is the opperation with a signed integer?
+  logic                       IntToFp;    // is the opperation an int->fp conversion?
+  logic                       CvtOp;      // convertion opperation
+  logic                       DivOp;      // divider opperation
+  logic                       InfIn;      // are any of the inputs infinity
+  logic                       NaNIn;      // are any of the inputs NaN
+
+  // signals to help readability
+  
+  assign DivOp = (PostProcSel == 2'b01);
+  assign Sqrt =  OpCtrl[0];
+
+  // is there an input of infinity or NaN being used
+  assign InfIn = XInf|YInf;
+  assign NaNIn = XNaN|YNaN;
+
+  // choose the ouptut format depending on the opperation
+  //      - fp -> fp: OpCtrl contains the percision of the output
+  //      - otherwise: Fmt contains the percision of the output
+  if (P.FPSIZES == 2) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); 
+      assign OutFmt = Fmt;
+  else if (P.FPSIZES == 3 | P.FPSIZES == 4) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 
+      assign OutFmt = Fmt;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Normalization
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // final claulations before shifting
+
+  divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
+
+  assign ShiftAmt = DivShiftAmt;
+  assign ShiftIn = DivShiftIn;
+  
+  // main normalization shift
+  divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted);
+
+  // correct for LZA/divsqrt error
+  divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Rounding
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // round to nearest even
+  // round to zero
+  // round to -infinity
+  // round to infinity
+  // round to nearest max magnitude
+
+  // calulate result sign used in rounding unit
+  divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms);
+
+  divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue,
+      .Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Sign calculation
+  ///////////////////////////////////////////////////////////////////////////////
+
+  assign Rs = Ms;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Flags
+  ///////////////////////////////////////////////////////////////////////////////
+
+  divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, 
+              .Xs, .OutFmt, .Sqrt,
+              .NaNIn, .Round, .DivByZero,
+              .Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1,
+              .Me, .Invalid, .Overflow, .PostProcFlg);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Select the result
+  ///////////////////////////////////////////////////////////////////////////////
+
+  //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
+
+  divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero, 
+      .Frm, .OutFmt, .XNaN, .YNaN,  
+      .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
+      .XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
new file mode 100644
index 000000000..7a7e46964
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -0,0 +1,267 @@
+///////////////////////////////////////////
+// divremsqrtround.sv
+//
+// Written: kekim@hmc.edu, me@KatherineParry.com
+// Modified: 19 May 2023
+//
+// Purpose: Rounder
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+module divremsqrtround import cvw::*;  #(parameter cvw_t P)  (
+  input  logic [P.FMTBITS-1:0]     OutFmt,             // output format
+  input  logic [2:0]              Frm,                // rounding mode
+  input  logic                    Ms,                 // normalized sign
+  input  logic [P.NORMSHIFTSZDRSU-1:0] Mf,                 // normalized fraction
+  // divsqrt
+  input  logic                    DivOp,              // is a division opperation being done
+  input  logic                    DivSticky,          // divsqrt sticky bit
+  input  logic [P.NE+1:0]          Ue,                 // the divsqrt calculated expoent
+  // outputs
+  output logic [P.NE+1:0]          Me,                 // normalied fraction
+  output logic                    UfPlus1,            // do you add one to the result if given an unbounded exponent
+  output logic [P.NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
+  output logic [P.NE-1:0]          Re,                 // Result exponent
+  output logic [P.NF-1:0]          Rf,                 // Result fractionNormS
+  output logic                    Sticky,             // sticky bit
+  output logic                    Plus1,              // do you add one to the final result
+  output logic                    Round, Guard        // bits needed to calculate rounding
+);
+
+  logic           UfCalcPlus1;        // calculated plus one for unbounded exponent
+  logic           NormSticky;         // normalized sum's sticky bit
+  logic [P.NF-1:0] RoundFrac;          // rounded fraction
+  logic           FpGuard, FpRound;   // floating point round/guard bits
+  logic           FpLsbRes;           // least significant bit of floating point result
+  logic           LsbRes;             // lsb of result
+  logic           CalcPlus1;          // calculated plus1
+  logic           FpPlus1;            // do you add one to the fp result 
+  logic [P.FLEN:0] RoundAdd;           // how much to add to the result
+
+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Rounding
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // round to nearest even
+  //      {Round, Sticky}
+  //      0x - do nothing
+  //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+  //          - don't add 1 if a small number was supposed to be subtracted
+  //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+  //         - plus 1 otherwise
+
+  //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+  //  round to -infinity
+  //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+  //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+  //  round to infinity
+  //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+  //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
+
+  //  round to nearest max magnitude
+  //      {Guard, Round, Sticky}
+  //      0x - do nothing
+  //      10 - tie - Plus1
+  //          - don't add 1 if a small number was supposed to be subtracted
+  //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+  //         - Plus 1 otherwise
+
+
+  // determine what format the final result is in: int or fp
+
+  // sticky bit calculation
+  if (P.FPSIZES == 1) begin
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
+
+  end else if (P.FPSIZES == 2) begin
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
+
+
+  end else if (P.FPSIZES == 3) begin
+
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
+
+  end else if (P.FPSIZES == 4) begin
+    assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) | 
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) |
+                                                (|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT));
+  end
+  
+
+
+  // only add the Addend sticky if doing an FMA opperation
+  //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
+  //assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm;
+  assign Sticky = DivSticky&DivOp | NormSticky;
+  //assign Sticky = DivSticky&DivOp;
+  
+
+
+
+  // determine round and LSB of the rounded value
+  //      - underflow round bit is used to determint the underflow flag
+  if (P.FPSIZES == 1) begin
+      assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
+      assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
+      assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
+
+  end else if (P.FPSIZES == 2) begin
+      assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
+      assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1];
+      assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
+
+  end else if (P.FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              P.FMT: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
+              end
+              P.FMT1: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
+              end
+              P.FMT2: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2];
+              end
+              default: begin
+                  FpGuard = 1'bx;
+                  FpLsbRes = 1'bx;
+                  FpRound = 1'bx;
+              end
+          endcase
+  end else if (P.FPSIZES == 4) begin
+      always_comb
+          case (OutFmt)
+              2'h3: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2];
+              end
+              2'h1: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2];
+              end
+              2'h0: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2];
+              end
+              2'h2: begin
+                  FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1];
+                  FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF];
+                  FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2];
+              end
+          endcase
+  end
+
+  
+  assign Guard =  FpGuard;
+  assign LsbRes = FpLsbRes;
+  assign Round =  FpRound;
+
+
+  always_comb begin
+      // Determine if you add 1
+      case (Frm)
+          3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
+          3'b001: CalcPlus1 = 0;//round to zero
+          3'b010: CalcPlus1 = Ms;//round down
+          3'b011: CalcPlus1 = ~Ms;//round up
+          3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
+          default: CalcPlus1 = 1'bx;
+      endcase
+      // Determine if you add 1 (for underflow flag)
+      case (Frm)
+          3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
+          3'b001: UfCalcPlus1 = 0;//round to zero
+          3'b010: UfCalcPlus1 = Ms;//round down
+          3'b011: UfCalcPlus1 = ~Ms;//round up
+          3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
+          default: UfCalcPlus1 = 1'bx;
+      endcase
+  
+  end
+
+  // If an answer is exact don't round
+  assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
+  assign FpPlus1 = Plus1;
+  assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
+
+
+
+
+  // place Plus1 into the proper position for the format
+  if (P.FPSIZES == 1) begin
+      assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
+
+  end else if (P.FPSIZES == 2) begin
+      // \/FLEN+1
+      //  | NE+2 |        NF      |
+      //  '-NE+2-^----NF1----^
+      // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
+      assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
+
+  end else if (P.FPSIZES == 3) begin
+      assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
+
+  end else if (P.FPSIZES == 4)      
+      assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
+
+
+
+  // trim unneeded bits from fraction
+  assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF];
+  
+
+
+  // select the exponent
+  assign Me = Ue;
+
+
+
+  // round the result
+  //      - if the fraction overflows one should be added to the exponent
+  assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
+  assign Re = FullRe[P.NE-1:0];
+
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv
new file mode 100644
index 000000000..b0dd4270b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv
@@ -0,0 +1,45 @@
+///////////////////////////////////////////
+// divremsqrtroundsign.sv
+//
+// Written: kekim@hmc.edu,me@KatherineParry.com
+// Modified: 19 May 2023
+//
+// Purpose: Sign calculation for rounding
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module divremsqrtroundsign import cvw::*;  #(parameter cvw_t P) (
+  input logic         Xs,     // x sign
+  input logic         Ys,     // y sign
+  input logic         Sqrt,   // sqrt oppertion? (when using divsqrt unit)
+  input logic         DivOp,  // is divsqrt opperation
+  output logic        Ms      // normalized result sign
+);
+
+  logic               Qs;     // divsqrt result sign
+
+  // calculate divsqrt sign
+  assign Qs = Xs^(Ys&~Sqrt);
+
+  // Select sign for rounding calulation
+  assign Ms = (Qs&DivOp);
+
+endmodule
\ No newline at end of file
diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
new file mode 100644
index 000000000..6b119ed6c
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
@@ -0,0 +1,94 @@
+///////////////////////////////////////////
+// divremsqrtshiftcorrection.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: shift correction
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module divremsqrtshiftcorrection import cvw::*;  #(parameter cvw_t P) (
+  input logic  [P.NORMSHIFTSZDRSU-1:0] Shifted,                // the shifted sum before LZA correction
+  // divsqrt
+  input logic                     DivOp,                  // is it a divsqrt opperation
+  input logic                     DivResSubnorm,          // is the divsqrt result subnormal
+  input logic  [P.NE+1:0]          DivUe,                  // the divsqrt result's exponent
+  input logic                     DivSubnormShiftPos,     // is the subnorm divider shift amount positive (ie not underflowed)
+  //fma
+  //input logic                     FmaOp,                  // is it an fma opperation
+  //input logic  [P.NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
+  //input logic                     FmaPreResultSubnorm,    // is the result subnormal - calculated before LZA corection
+  //input logic                     FmaSZero,
+  // output
+  //output logic [P.NE+1:0]          FmaMe,                  // exponent of the normalized sum
+  output logic [P.NORMSHIFTSZDRSU-1:0] Mf,                     // the shifted sum before LZA correction
+  output logic [P.NE+1:0]          Ue                      // corrected exponent for divider
+);
+
+  logic [P.NORMSHIFTSZDRSU-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
+  logic [P.NORMSHIFTSZDRSU-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
+  logic                       ResSubnorm;                 // is the result Subnormal
+  logic                       LZAPlus1;                   // add one or two to the sum's exponent due to LZA correction
+  logic                       LeftShiftQm;                // should the divsqrt result be shifted one to the left
+
+  // LZA correction
+  assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1];
+
+  // correct the shifting error caused by the LZA
+  //  - the only possible mantissa for a plus two is all zeroes 
+  //      - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
+  //mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted);
+
+  // correct the shifting of the divsqrt caused by producing a result in (2, .5] range
+  //    condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
+  assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
+  //assign LeftShiftQm = ((DivUe==1));
+  assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}};
+  assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}};
+  mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
+  
+  // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
+  always_comb
+    //if(FmaOp)                       Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}};
+    //if (DivOp&~DivResSubnorm)  Mf = CorrQmShifted;
+    if (~DivResSubnorm)  Mf = CorrQmShifted;
+    else                       Mf = Shifted[P.NORMSHIFTSZDRSU-1:0];
+    
+  // Determine sum's exponent
+  //  main exponent issues: 
+  //      - LZA was one too large
+  //      - LZA was two too large
+  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
+  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
+  //                          if plus1                    If plus2                               kill if the result Zero or actually subnormal
+  //                          |                           |                                      |
+  //assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
+  
+  // recalculate if the result is subnormal after LZA correction
+  //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1];
+
+  // the quotent is in the range [.5,2) if there is no early termination
+  // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
+  assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
+  //assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
+endmodule
\ No newline at end of file
diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
new file mode 100644
index 000000000..d7f569add
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -0,0 +1,240 @@
+///////////////////////////////////////////
+// divremsqrtspecialcase.sv
+//
+// Written: kekim@hmc.edu,me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: special case selection
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module divremsqrtspecialcase import cvw::*;  #(parameter cvw_t P) (
+  input  logic                Xs,         // X sign
+  input  logic [P.NF:0]        Xm, Ym, // input significand's
+  input  logic                XNaN, YNaN, // are the inputs NaN
+  input  logic [2:0]          Frm,        // rounding mode
+  input  logic [P.FMTBITS-1:0] OutFmt,     // output format
+  input  logic                InfIn,      // are any inputs infinity
+  input  logic                NaNIn,      // are any input NaNs
+  input  logic                XInf, YInf, // are X or Y inifnity
+  input  logic                XZero,      // is X zero
+  input  logic                Plus1,      // do you add one for rounding
+  input  logic                Rs,         // the result's sign
+  input  logic                Invalid, Overflow,  // flags to choose the result
+  input  logic [P.NE-1:0]      Re,         // Result exponent
+  input  logic [P.NE+1:0]      FullRe,     // Result full exponent
+  input  logic [P.NF-1:0]      Rf,         // Result fraction
+  // divsqrt
+  input  logic                DivOp,      // is it a divsqrt opperation
+  input  logic                DivByZero,  // divide by zero flag
+  // outputs
+  output logic [P.FLEN-1:0]    PostProcRes // final result
+);
+
+  logic [P.FLEN-1:0]   XNaNRes;    // X is NaN result
+  logic [P.FLEN-1:0]   YNaNRes;    // Y is NaN result
+  logic [P.FLEN-1:0]   InvalidRes; // Invalid result result
+  logic [P.FLEN-1:0]   UfRes;      // underflowed result result
+  logic [P.FLEN-1:0]   OfRes;      // overflowed result result
+  logic [P.FLEN-1:0]   NormRes;    // normal result
+  logic               OfResMax;   // does the of result output maximum norm fp number
+  logic               KillRes;    // kill the result for underflow
+  logic               SelOfRes;   // should the overflow result be selected
+
+
+  // does the overflow result output the maximum normalized floating point number
+  //                output infinity if the input is infinity
+  assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
+
+  // select correct outputs for special cases
+  if (P.FPSIZES == 1) begin
+      //NaN res selection depending on standard
+      if(P.IEEE754) begin
+          assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+          assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+      end else begin
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+      end
+
+      assign OfRes =  OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+      assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = {Rs, Re, Rf};
+
+  end else if (P.FPSIZES == 2) begin
+      if(P.IEEE754) begin
+          assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+          assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+      end else begin 
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+      end
+
+      always_comb
+          if(OutFmt)
+              if(OfResMax)    OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
+              else            OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+          else
+              if(OfResMax)    OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
+              else            OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+      assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
+
+  end else if (P.FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              P.FMT: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end else begin 
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end
+                  
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {Rs, Re, Rf};
+              end
+              P.FMT1: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+                      YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
+                  end
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+                  UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
+              end
+              P.FMT2: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
+                      YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
+                  UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
+              end
+              default: begin
+                  if(P.IEEE754) begin
+                      XNaNRes = (P.FLEN)'(0);
+                      YNaNRes = (P.FLEN)'(0);
+                      InvalidRes = (P.FLEN)'(0);
+                  end else begin 
+                      InvalidRes = (P.FLEN)'(0);
+                  end
+                  OfRes = (P.FLEN)'(0);
+                  UfRes = (P.FLEN)'(0);
+                  NormRes = (P.FLEN)'(0);
+              end
+          endcase
+
+  end else if (P.FPSIZES == 4) begin 
+      always_comb
+          case (OutFmt)
+              2'h3: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end else begin 
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
+                  end
+                  
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {Rs, Re, Rf};
+              end
+              2'h1: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
+                      YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
+                  end
+                  OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
+                  UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
+              end
+              2'h0: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
+                      YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
+                  UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
+              end
+              2'h2: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
+                      YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};      
+                // zero is exact if dividing by infinity so don't add 1
+                  UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
+              end
+          endcase
+  end
+
+  // determine if you shoould kill the res - Cvt
+  //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+  //      - dont set to zero if fp input is zero but not using the fp input
+  //      - dont set to zero if int input is zero but not using the int input
+  assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
+  
+  // calculate if the overflow result should be selected
+  assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
+  
+  // output infinity with result sign if divide by zero
+  if(P.IEEE754)
+    always_comb
+      if(XNaN)                    PostProcRes = XNaNRes;
+      else if(YNaN)               PostProcRes = YNaNRes;
+      else if(Invalid)            PostProcRes = InvalidRes;
+      else if(SelOfRes)           PostProcRes = OfRes;
+      else if(KillRes)            PostProcRes = UfRes;
+      else                        PostProcRes = NormRes;
+  else
+    always_comb
+      if(NaNIn|Invalid)           PostProcRes = InvalidRes;
+      else if(SelOfRes)           PostProcRes = OfRes;
+      else if(KillRes)            PostProcRes = UfRes;
+      else                        PostProcRes = NormRes;
+
+endmodule
\ No newline at end of file
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
new file mode 100644
index 000000000..2385cac20
--- /dev/null
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -0,0 +1,102 @@
+///////////////////////////////////////////
+// drsu.sv
+//
+// Written: kekim@hmc.edu
+// Modified:19 May 2023
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module drsu import cvw::*;  #(parameter cvw_t P) (
+  input  logic                clk, 
+  input  logic                reset, 
+  input  logic [P.FMTBITS-1:0] FmtE,
+  input  logic                XsE, YsE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
+  input  logic                XInfE, YInfE, 
+  input  logic                XZeroE, YZeroE, 
+  input  logic                XNaNE, YNaNE, 
+  input  logic                XSNaNE, YSNaNE,
+  input  logic                FDivStartE, IDivStartE,
+  input  logic                StallM,
+  input  logic                FlushE,
+  input  logic                SqrtE, SqrtM,
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [2:0]          Funct3E, Funct3M,
+  input  logic                IntDivE, W64E,
+  input  logic [2:0]          Frm,
+  input  logic [3:0]          OpCtrl,
+  input  logic [1:0]          PostProcSel,
+  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
+  output logic [P.FLEN-1:0]    FResM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,
+  output logic [4:0]          FlgM
+);
+
+  // Floating-point division and square root module, with optional integer division and remainder
+  // Computes X/Y, sqrt(X), A/B, or A%B
+
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
+  logic                       Firstun;                      // Quotient selection
+  logic                       WZeroE;                       // Early termination flag
+  logic [P.DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
+  logic                       DivStartE;                    // Enable signal for flops during stall
+                                                            
+  // Integer div/rem signals                                
+  logic                       BZeroM;                       // Denominator is zero
+  logic                       IntDivM;                      // Integer operation
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic                       ISpecialCaseE;                // Integer div/remainder special cases
+  logic [P.DIVb:0]             UmM;
+  logic [P.NF+2:0]             UmMexact; //U1.NF+2
+  logic [P.NE+1:0]             UeM;
+  logic                       DivStickyM;
+  logic [P.INTDIVb+3:0]          PreResultM;
+  logic [P.XLEN-1:0]          PreIntResultM;
+  logic [P.DIVBLEN-1:0]       IntNormShiftM;
+
+  divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 
+            .XeE, .YeE, .SqrtE, .SqrtM,
+                    .XInfE, .YInfE, .XZeroE, .YZeroE, 
+            .XNaNE, .YNaNE, 
+                    .FDivStartE, .IDivStartE, .W64E,
+                    .StallM, .DivStickyM, .FDivBusyE, .UeM,
+                    .UmM,
+                    .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
+                    .Funct3E, .IntDivE, .FIntDivResultM, .IntDivM,
+                    .FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM);
+  assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs
+  divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM,
+    .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), 
+    .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM),
+    .PreIntResultM, .PreResultM, .IntNormShiftM);
+endmodule
+
diff --git a/src/fpu/divremsqrt/intrightshift.sv b/src/fpu/divremsqrt/intrightshift.sv
new file mode 100644
index 000000000..dd4f47aeb
--- /dev/null
+++ b/src/fpu/divremsqrt/intrightshift.sv
@@ -0,0 +1,37 @@
+///////////////////////////////////////////
+// fdivsqrtpostproc.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
+// Modified:13 January 2022
+//
+// Purpose: Divide/Square root postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module intrightshift import cvw::*;  #(parameter cvw_t P) (
+  input logic signed [P.INTDIVb+3:0] shiftin,
+  input logic [P.DIVBLEN-1:0] shiftamt,
+  output logic signed [P.INTDIVb+3:0] shifted
+);
+  assign shifted = shiftin >> shiftamt;
+
+endmodule
diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
new file mode 100644
index 000000000..9ca2e5b61
--- /dev/null
+++ b/testbench/testbench-fp.sv
@@ -0,0 +1,1682 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com, james.stine@okstate.edu
+//
+// Purpose: Testbench for UCB Testfloat on Wally
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "config.vh"
+`include "tests-fp.vh"
+
+import cvw::*;
+
+module testbenchfp;
+   // Two parameters TEST, TEST_SIZE used with testfloat.do in sim dir
+   // to run specific precisions (e.g., quad or all)
+   parameter TEST="none";
+   parameter TEST_SIZE="none";
+
+  `include "parameter-defs.vh"   
+
+   //parameter MAXVECTORS = 8388610;
+   parameter MAXVECTORS = 100000;
+
+   // FIXME: needs cleaning of unused variables (jes)
+   string                       Tests[];                    // list of tests to be run
+   logic [3:0] 			OpCtrl[];                   // list of op controls
+   logic [2:0] 			Unit[];                     // list of units being tested
+   logic                        WriteInt[];                 // Is being written to integer resgiter
+   logic [2:0] 			Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
+   //logic [2:0] 			Frm[4:0] = {3'b011, 3'b011, 3'b011, 3'b011, 3'b011}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 *** MODIFIED ROUNDING MODES
+   logic [1:0] 			Fmt[];                      // list of formats for the other units  
+
+   logic                        clk=0;
+   logic [31:0] 		TestNum=0;                  // index for the test
+   logic [31:0] 		OpCtrlNum=0;                // index for OpCtrl
+   logic [31:0] 		errors=0;                   // how many errors
+   logic [31:0] 		VectorNum=0;                // index for test vector
+   logic [31:0] 		FrmNum=0;                   // index for rounding mode
+   logic [P.Q_LEN*4+7:0] 	TestVectors[MAXVECTORS:0];     // list of test vectors
+
+   logic [1:0] 			FmtVal;                     // value of the current Fmt
+   logic [2:0] 			UnitVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
+   logic [3:0]          OpCtrlVal;
+   logic                        WriteIntVal;                // value of the current WriteInt
+   logic [P.FLEN-1:0] 		X, Y, Z;                    // inputs read from TestFloat
+   logic [P.FLEN-1:0] 		XPostBox;                   // inputs read from TestFloat
+   logic [P.XLEN-1:0] 		SrcA, SrcB;                       // integer input
+   logic                  W64;                        // is W64 instruction
+   logic [P.FLEN-1:0] 		Ans;                        // correct answer from TestFloat
+   logic [P.FLEN-1:0] 		Res;                        // result from other units
+   logic [4:0] 			AnsFlg;                     // correct flags read from testfloat
+   logic [4:0] 			ResFlg, Flg;                // Result flags
+   logic [P.FMTBITS-1:0] 	ModFmt;                     // format - 10 = half, 00 = single, 01 = double, 11 = quad
+   logic [P.FLEN-1:0] 		FpRes, FpCmpRes;            // Results from each unit
+   logic [P.XLEN-1:0] 		IntRes, CmpRes;             // Results from each unit
+   logic [4:0] 			FmaFlg, CvtFlg, DivFlg;     // Outputed flags
+   logic [4:0] 			CmpFlg;                     // Outputed flags
+   logic                        AnsNaN, ResNaN, NaNGood;
+   logic                        Xs, Ys, Zs;                 // sign of the inputs
+   logic [P.NE-1:0] 		Xe, Ye, Ze;                 // exponent of the inputs
+   logic [P.NF:0] 		Xm, Ym, Zm;                 // mantissas of the inputs
+   logic                        XNaN, YNaN, ZNaN;           // is the input NaN
+   logic                        XSNaN, YSNaN, ZSNaN;        // is the input a signaling NaN
+   logic                        XSubnorm, ZSubnorm;         // is the input denormalized
+   logic                        XInf, YInf, ZInf;           // is the input infinity
+   logic                        XZero, YZero, ZZero;        // is the input zero
+   logic                        XExpMax, YExpMax, ZExpMax;  // is the input's exponent all ones  
+   logic [P.CVTLEN-1:0] 	CvtLzcInE;                  // input to the Leading Zero Counter (priority encoder)
+   logic                        IntZero;
+   logic                        CvtResSgnE;
+   logic [P.NE:0] 		CvtCalcExpE;                // the calculated exponent
+   logic [P.LOGCVTLEN-1:0] 	CvtShiftAmtE;               // how much to shift by
+   logic [P.DIVb:0] 		Quot;
+   logic                        CvtResSubnormUfE;
+   logic                        DivStart=0;
+   logic 			FDivBusyE;
+   logic 			OldFDivBusyE;
+   logic                        reset = 1'b0;
+   logic [$clog2(P.NF+2)-1:0] 	XZeroCnt, YZeroCnt;
+
+   // in-between FMA signals
+   logic                        Mult;
+   logic                        Ss;
+   logic [P.NE+1:0] 		Pe;
+   logic [P.NE+1:0] 		Se;
+   logic 			ASticky;
+   logic 			KillProd; 
+   logic [$clog2(3*P.NF+5)-1:0] SCnt;
+   logic [3*P.NF+3:0] 		Sm;       
+   logic 			InvA;
+   logic 			NegSum;
+   logic 			As;
+   logic 			Ps;
+   logic                        DivSticky;
+   logic                        DivDone;
+   logic                        DivNegSticky;
+   logic [P.NE+1:0] 		DivCalcExp;
+   logic                        divsqrtop;
+
+   // Missing logic vectors fdivsqrt
+   logic [2:0] 			Funct3E;
+   logic [2:0] 			Funct3M;
+   logic 			FlushE;
+   logic 			IFDivStartE;
+   logic      IDivStart;
+   logic 			FDivDoneE;
+   logic [P.NE+1:0] 		UeM;
+   logic [P.DIVb:0] 		UmM;
+   logic [P.XLEN-1:0] 		FIntDivResultM;
+   logic 			ResMatch;                   // Check if result match
+   logic 			FlagMatch;                  // Check if IEEE flags match
+   logic 			CheckNow;                   // Final check
+   logic 			FMAop;                      // Is this a FMA operation?
+   logic      IntDivE;                    // Is Integer operation on FPU?
+
+   // FSM for testing each item per clock
+   typedef enum logic [2:0] {S0, Start, S2, Done} statetype;
+   statetype state, nextstate;   
+   
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //     ||||||||| |||||||| ||||||| |||||||||   ||||||| |||||||| |||
+   //        |||    |||      |||        |||      |||     |||      |||
+   //        |||    |||||||| |||||||    |||      ||||||| |||||||| |||
+   //        |||    |||          |||    |||          ||| |||      |||
+   //        |||    |||||||| |||||||    |||      ||||||| |||||||| |||||||||
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // select tests relevent to the specified configuration
+   //    cvtint - test integer conversion unit (fcvtint)
+   //    cvtfp  - test floating-point conversion unit (fcvtfp)
+   //    cmp    - test comparison unit's LT, LE, EQ opperations (fcmp)
+   //    add    - test addition
+   //    sub    - test subtraction
+   //    div    - test division
+   //    sqrt   - test square root
+   //    all    - test all of the above
+   flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M));
+
+   initial begin
+      // Information displayed for user on what is simulating
+      // $display("\nThe start of simulation...");      
+      $display("\nThe start of simulation... INTDIVb: %d, DIVB: %d, DIVBLEN: %d , RK: %d",INTDIVb, DIVb, DIVBLEN, RK);      
+      // $display("This simulation for TEST is %s", TEST);
+      if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
+         if (TEST === "cvtint" | TEST === "all") begin  // if testing integer conversion
+            // add the 128-bit cvtint tests to the to-be-tested list
+            Tests = {Tests, f128rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are supported add their conversions
+               Tests = {Tests, f128rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+            end
+         end 
+         // if the floating-point conversions are being tested          
+         if (TEST === "cvtfp" | TEST === "all") begin  
+            if (P.D_SUPPORTED) begin // if double precision is supported
+               // add the 128 <-> 64 bit conversions to the to-be-tested list
+               Tests = {Tests, f128f64cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b01, 3'b11};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+            end
+            if (P.F_SUPPORTED) begin // if single precision is supported
+               // add the 128 <-> 32 bit conversions to the to-be-tested list
+               Tests = {Tests, f128f32cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b00, 3'b11};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+            end
+            if (P.ZFH_SUPPORTED) begin // if half precision is supported
+               // add the 128 <-> 16 bit conversions to the to-be-tested list
+               Tests = {Tests, f128f16cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b10, 3'b11};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b11};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
+            // add the compare tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
+            // add the addition tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+            // add the subtraction tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+            // add the multiply tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the divide tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+         if (TEST === "fma" | TEST === "all") begin  // if fused-mutliply-add is being tested
+            Tests = {Tests, f128fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+      end
+      if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported
+         if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested
+            Tests = {Tests, f64rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are being supported
+               Tests = {Tests, f64rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+            end
+         end
+         if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
+            if (P.F_SUPPORTED) begin // if single precision is supported
+               // add the 64 <-> 32 bit conversions to the to-be-tested list
+               Tests = {Tests, f64f32cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b00, 3'b01};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+            end
+            if (P.ZFH_SUPPORTED) begin // if half precision is supported
+               // add the 64 <-> 16 bit conversions to the to-be-tested list
+               Tests = {Tests, f64f16cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b10, 3'b01};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b01};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f64sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+         if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
+            Tests = {Tests, f64fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+      end
+      if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported
+         if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
+            Tests = {Tests, f32rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are supported
+               Tests = {Tests, f32rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+            end
+         end
+         if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
+            if (P.ZFH_SUPPORTED) begin 
+               // add the 32 <-> 16 bit conversions to the to-be-tested list
+               Tests = {Tests, f32f16cvt};
+               // add the op-ctrls (i.e. the format of the result)
+               OpCtrl = {OpCtrl, 3'b10, 3'b00};
+               WriteInt = {WriteInt, 1'b0, 1'b0};
+               // add the unit being tested and fmt (input format)
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b00};
+               end
+               for(int i = 0; i<5; i++) begin
+                  Unit = {Unit, `CVTFPUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin // if addition is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiply is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f32sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+         if (TEST === "fma" | TEST === "all")  begin // if fma is being tested
+            Tests = {Tests, f32fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+      end
+      if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported
+         if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested
+            Tests = {Tests, f16rv32cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+            if (P.XLEN == 64) begin // if 64-bit integers are supported
+               Tests = {Tests, f16rv64cvtint};
+               // add the op-codes for these tests to the op-code list
+               OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+               WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+               // add what unit is used and the fmt to their lists (one for each test)
+               for(int i = 0; i<20; i++) begin
+                  Unit = {Unit, `CVTINTUNIT};
+                  Fmt = {Fmt, 2'b10};
+               end
+            end
+         end
+         if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16cmp};
+            OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0};
+            for(int i = 0; i<15; i++) begin
+               Unit = {Unit, `CMPUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "add" | TEST === "all") begin //  if addition is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16add};
+            OpCtrl = {OpCtrl, `ADD_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16sub};
+            OpCtrl = {OpCtrl, `SUB_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16mul};
+            OpCtrl = {OpCtrl, `MUL_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "div" | TEST === "all") begin // if division is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16div};
+            OpCtrl = {OpCtrl, `DIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+         if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+            // add the correct tests/op-ctrls/unit/fmt to their lists
+            Tests = {Tests, f16sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `DIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end 
+         end
+         if (TEST === "fma" | TEST === "all") begin // if fma is being tested
+            Tests = {Tests, f16fma};
+            OpCtrl = {OpCtrl, `FMA_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `FMAUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+      end
+      if (P.IDIV_ON_FPU |1'b1) begin
+        if (P.Q_SUPPORTED) begin
+           if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f128div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b11};
+           end
+         end
+         if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f128sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+        end
+        if (P.D_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f64div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b01};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f64sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+        end
+        if (P.S_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f32div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b00};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f32sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+
+        end
+        if (P.ZFH_SUPPORTED) begin
+          if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested
+           // add the divide tests/op-ctrls/unit/fmt
+           Tests = {Tests, f16div};
+           OpCtrl = {OpCtrl, `DIV_OPCTRL};
+           WriteInt = {WriteInt, 1'b0};
+           for(int i = 0; i<5; i++) begin
+             Unit = {Unit, `INTDIVUNIT};
+             Fmt = {Fmt, 2'b10};
+           end
+          end
+          if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested
+            // add the square-root tests/op-ctrls/unit/fmt
+            Tests = {Tests, f16sqrt};
+            OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+            WriteInt = {WriteInt, 1'b0};
+            for(int i = 0; i<5; i++) begin
+               Unit = {Unit, `INTDIVUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+        end
+        if (P.XLEN == 64 & P.IDIV_ON_FPU) begin
+         if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested
+            Tests = {Tests, int64rem};
+            OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested
+            Tests = {Tests, int64div};
+            OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested
+            Tests = {Tests, int64remu};
+            OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested
+            Tests = {Tests, int64divu};
+            OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer remainder is being tested
+            Tests = {Tests, int64remw};
+            OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer remainder is being tested
+            Tests = {Tests, int64remuw};
+            OpCtrl = {OpCtrl, `INTREMUW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer division is being tested
+            Tests = {Tests, int64divw};
+            OpCtrl = {OpCtrl, `INTDIVW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer divison is being tested
+            Tests = {Tests, int64divuw};
+            OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+        end
+        // RV32 
+        else if (P.IDIV_ON_FPU) begin 
+         if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested
+            Tests = {Tests, int32rem};
+            OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested
+            Tests = {Tests, int32div};
+            OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested
+            Tests = {Tests, int32remu};
+            OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested
+            Tests = {Tests, int32divu};
+            OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
+            WriteInt = {WriteInt, 1'b1};
+            Unit = {Unit, `INTDIVUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+        end
+      end
+      // check if nothing is being tested
+
+      $display("This simulation for TEST contains %d vectors", Tests.size);      
+      if (Tests.size() == 0) begin
+         $display("TEST %s not supported in this configuration", TEST);
+         $stop;
+      end
+   end
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //     ||||||||| |||||||| ||||||||| |||||||     ||||||||| |||||||| ||||||| |||||||||   
+   //     |||   ||| |||      |||   ||| ||   ||        |||    |||      |||        |||      
+   //     ||||||||  |||||||| ||||||||| ||   ||        |||    |||||||| |||||||    |||      
+   //     |||  ||   |||      |||   ||| ||   ||        |||    |||          |||    |||      
+   //     |||   ||| |||||||| |||   ||| |||||||        |||    |||||||| |||||||    |||      
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // Read the first test
+   initial begin
+      //string testname = {`PATH, Tests[TestNum]}; 
+      static string pp = `PATH;
+      string testname;
+      string tt0;
+      tt0 = $psprintf("%s", Tests[TestNum]);
+      testname = {pp, tt0};
+      //$display("Here you are %s", testname);     
+      $display("\n\nRunning %s vectors ", Tests[TestNum]);
+      $readmemh(testname, TestVectors);
+
+      // set the test index to 0
+      TestNum = 0;
+   end
+
+   // set a the signals for all tests
+   always_comb UnitVal = Unit[TestNum];
+   always_comb FmtVal = Fmt[TestNum];
+   always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
+   always_comb WriteIntVal = WriteInt[OpCtrlNum];
+   always_comb FrmVal = Frm[FrmNum];
+
+   // modify the format signal if only 2 percisions supported
+   //    - 1 for the larger precision
+   //    - 0 for the smaller precision
+   always_comb begin
+      if (P.FMTBITS == 1) ModFmt = FmtVal == P.FMT;
+      else ModFmt = FmtVal;
+   end
+
+   // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
+   readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
+                                 .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB,
+                                 .Xs, .Ys, .Zs, .Unit(UnitVal),
+                                 .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
+                                 .Xm, .Ym, .Zm, 
+                                 .XNaN, .YNaN, .ZNaN,
+                                 .XSNaN, .YSNaN, .ZSNaN, 
+                                 .XSubnorm, .ZSubnorm, 
+                                 .XZero, .YZero, .ZZero,
+                                 .XInf, .YInf, .ZInf, .XExpMax, .Funct3E, .W64,
+                                 .X, .Y, .Z, .XPostBox);
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //     |||||||   |||   ||| ||||||||| 
+   //     |||   ||| |||   |||    |||    
+   //     |||   ||| |||   |||    |||    
+   //     |||   ||| |||   |||    |||         
+   //     |||||||   |||||||||    |||    
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // instantiate devices under test
+   if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma
+      fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
+                   .Xe(Xe), .Ye(Ye), .Ze(Ze), 
+                   .Xm(Xm), .Ym(Ym), .Zm(Zm),
+                   .XZero, .YZero, .ZZero, .Ss, .Se,
+                   .OpCtrl(OpCtrlVal[2:0]), .Sm, .InvA, .SCnt, .As, .Ps,
+                   .ASticky); 
+   end
+   
+   if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
+      fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
+                      .XZero(XZero), .OpCtrl(OpCtrlVal[2:0]), .IntZero,
+                      .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), 
+                      .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
+   end
+
+   if (TEST === "cmp" | TEST === "all") begin: fcmp
+      fcmp #(P) fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal[2:0]), .Xs, .Ys, .Xe, .Ye, 
+                   .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
+                   .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
+   end
+   
+   if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
+      fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+                             .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+                             .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+                             .XNaNE(XNaN), .YNaNE(YNaN), 
+                             .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                             .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
+                             .UmM(Quot),
+                             .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
+                             .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
+                             .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
+   end
+   if (TEST === "fdivremsqrt" | TEST === "div_drsu" | TEST === "sqrt_drsu" | TEST === "intdivrem" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" ) begin: divremsqrt
+    drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+      .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL),
+      .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]),
+      .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), 
+      .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64),
+      .StallM(1'b0), .FDivBusyE,
+      .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
+      .Funct3E(Funct3E), .IntDivE(IntDivE), 
+      .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
+  end
+  else begin: postprocess
+    postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+                .OpCtrl(OpCtrlVal[2:0]), .DivUm(Quot), .DivUe(DivCalcExp),
+                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+                .FmaASticky(ASticky), .FmaSe(Se),
+                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+  end
+
+   assign CmpFlg[3:0] = 0;
+
+   // produce clock
+   always begin
+      clk = 1; #5; clk = 0; #5;
+   end
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   //          |||||      |||  ||||||||||  |||||      |||
+   //          |||||||    |||  |||    |||  |||||||    |||
+   //          |||| |||   |||  ||||||||||  |||| |||   |||
+   //          ||||  |||  |||  |||    |||  ||||  |||  |||
+   //          ||||   ||| |||  |||    |||  ||||   ||| |||
+   //          ||||    ||||||  |||    |||  ||||    ||||||
+
+   ///////////////////////////////////////////////////////////////////////////////////////////////
+
+   // Check if the correct answer and result is a NaN
+   always_comb begin
+      if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
+         // an integer output can't be a NaN
+         AnsNaN = 1'b0;
+         ResNaN = 1'b0;
+      end
+      else if (UnitVal === `CVTFPUNIT) begin
+         case (OpCtrlVal[1:0])
+           2'b11: begin // quad             
+              AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]);
+              ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]);
+           end
+           2'b01: begin // double                 
+              AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]);
+              ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]);
+           end
+           2'b00: begin // single
+              AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]);
+              ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]);
+           end
+           2'b10: begin // half
+              AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
+              ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
+           end
+         endcase
+      end
+      else begin
+         case (FmtVal)
+           2'b11: begin // quad             
+              AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]);
+              ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]);
+           end
+           2'b01: begin // double                 
+              AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]);
+              ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]);
+           end
+           2'b00: begin // single
+              AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]);
+              ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]);
+           end
+           2'b10: begin // half
+              AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
+              ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
+           end
+         endcase
+      end
+   end 
+   
+   always_comb begin
+      // select the result to check
+      case (UnitVal)
+        `FMAUNIT: Res = FpRes;
+        `DIVUNIT: Res = FpRes;
+        `CMPUNIT: Res = CmpRes;
+        `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
+        `CVTFPUNIT: Res = FpRes;
+        `INTDIVUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
+      endcase
+
+      // select the flag to check
+      case (UnitVal)
+        `FMAUNIT: ResFlg = Flg;
+        `DIVUNIT: ResFlg = Flg;
+        `CMPUNIT: ResFlg = CmpFlg;
+        `CVTINTUNIT: ResFlg = Flg;
+        `CVTFPUNIT: ResFlg = Flg;
+        `INTDIVUNIT: ResFlg = Flg;
+      endcase 
+
+      // Use four state test sequence to handle div properly.
+      // Four states should allow other operations to finish
+      // properly and within time.
+      case (state)
+        S0: begin
+           DivStart = 1'b0;
+           nextstate = Start;
+        end
+        Start: begin
+           if (UnitVal == `DIVUNIT | (UnitVal == `INTDIVUNIT & (OpCtrlVal == `SQRT_OPCTRL | OpCtrlVal == `DIV_OPCTRL))) begin 
+             DivStart = 1'b1;
+             IntDivE = 1'b0;
+           end
+           else if (UnitVal == `INTDIVUNIT) begin
+             IDivStart = 1'b1;
+             IntDivE = 1'b1;
+           end
+           else
+             DivStart = 1'b0;	  
+           nextstate = S2;
+        end
+        S2: begin
+           DivStart = 1'b0;	  
+           IDivStart = 1'b0;
+           if ((FDivBusyE|~DivDone)&(UnitVal == `DIVUNIT | UnitVal == `INTDIVUNIT))
+             nextstate = S2;
+           else
+             nextstate = Done;
+        end
+        Done: begin
+           DivStart = 1'b0;
+           IDivStart = 1'b0;
+           IntDivE = 1'b0;
+           nextstate = S0;
+        end	
+      endcase // case (state)
+      
+   end 
+
+   // Provide reset for divsqrt to reset state
+   initial
+     begin
+        #0  reset = 1'b1;
+        #25 reset = 1'b0;     
+     end   
+
+   // Left-over from before - will remove soon
+   always @(posedge clk) 
+   OldFDivBusyE = FDivDoneE;
+
+   // state machine to handle timing for testing due
+   // various cycle counts for different fp/int operations
+   // Adds vector at start of clock
+   always @(posedge clk) begin
+
+      // state machine element for testing
+      if (reset)
+        state <= S0;
+      else
+        state <= nextstate;      
+
+      // Increment the vector when Done with each test
+      if (state == Done)
+        VectorNum += 1; // increment the vector
+      
+   end
+
+   // check results on falling edge of clk
+   always @(negedge clk) begin
+      // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
+      //    - the sign of the NaN does not matter for the opperations being tested
+      //    - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
+      if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
+         case (FmtVal)
+            2'b11: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | 
+                              (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]})) |
+                              (ZNaN&(Res[P.Q_LEN-2:0] === {Z[P.Q_LEN-2:P.Q_NF],1'b1,Z[P.Q_NF-2:0]})));
+            2'b01: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | 
+                              (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]})) |
+                              (ZNaN&(Res[P.D_LEN-2:0] === {Z[P.D_LEN-2:P.D_NF],1'b1,Z[P.D_NF-2:0]})));
+            2'b00: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | 
+                              (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]})) |
+                              (ZNaN&(Res[P.S_LEN-2:0] === {Z[P.S_LEN-2:P.S_NF],1'b1,Z[P.S_NF-2:0]})));
+            2'b10: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | 
+                              (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) |
+                              (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]})));
+         endcase
+      else if (UnitVal === `CVTFPUNIT) // if converting from FP to FP OpCtrl contains the final FP format
+         case (OpCtrlVal[1:0]) 
+            2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | 
+                              (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | 
+                              (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]})));
+            2'b01: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.D_LEN-2:0] === Ans[P.D_LEN-2:0])) | 
+                              (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | 
+                              (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]})));
+            2'b00: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.S_LEN-2:0] === Ans[P.S_LEN-2:0])) | 
+                              (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | 
+                              (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]})));
+            2'b10: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) |
+                              (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | 
+                              (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | 
+                              (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})));
+         endcase
+      else NaNGood = 1'b0; // integers can't be NaNs
+
+         
+      ///////////////////////////////////////////////////////////////////////////////////////////////
+
+      //     ||||||| |||    ||| ||||||| ||||||| |||   |||
+      //     |||     |||    ||| |||     |||     |||  |||
+      //     |||     |||||||||| ||||||| |||     ||||||
+      //     |||     |||    ||| |||     |||     |||  |||
+      //     ||||||| |||    ||| ||||||| ||||||| |||    |||
+
+      ///////////////////////////////////////////////////////////////////////////////////////////////
+
+      // check if result is correct
+      assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
+      assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal ==`INTDIVU_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTREMW_OPCTRL) | (OpCtrlVal == `INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) ; 
+      assign FMAop = (OpCtrlVal == `FMAUNIT);  
+      assign DivDone = OldFDivBusyE & ~FDivBusyE;
+      //assign DivDone =  ~FDivBusyE;
+      //assign DivDone =  FDivDoneE;
+      assign CheckNow = ((DivDone | ~divsqrtop) | 
+                         (TEST == "add" | TEST == "fma" | TEST == "sub") |
+                         ((TEST == "all") & (DivDone | ~divsqrtop)));
+            
+      if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin
+         errors += 1;
+         $display("\nError in %s", Tests[TestNum]);
+         $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
+         $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
+         $stop;
+      end
+      
+      if (TestVectors[VectorNum][100:0] === 101'bx & Tests[TestNum] !== "" ) begin // if reached the eof
+         // increment the test
+         TestNum += 1;
+         // clear the vectors
+         for(int i=0; i<MAXVECTORS; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}};
+         // read next files
+         $readmemh({`PATH, Tests[TestNum]}, TestVectors);
+         // set the vector index back to 0
+         VectorNum = 0;
+         // incemet the operation if all the rounding modes have been tested
+         if (FrmNum === 4 | WriteIntVal == 1'b1) OpCtrlNum += 1;
+         // increment the rounding mode or loop back to rne 
+         if (FrmNum < 4) FrmNum += 1;
+         else begin
+            FrmNum = 0;
+            // Add some time as a buffer between tests at the end of each test
+            // (to be removed)
+            repeat (10)
+              @(posedge clk);
+         end
+         
+         $display("Running %s vectors", Tests[TestNum]);
+      end
+      // if no more Tests - finish
+      if (Tests[TestNum] === "") begin
+              $display("\nAll Tests completed with %d errors\n", errors);
+              $stop;
+      end 
+   end
+endmodule
+
+
+module readvectors import cvw::*; #(parameter cvw_t P) (
+                    input logic 		clk,
+                    input logic [P.Q_LEN*4+7:0] 	TestVector,
+                    input logic [P.FMTBITS-1:0] ModFmt,
+                    input logic [1:0] 		Fmt,
+                    input logic [2:0] 		Unit,
+                    input logic [31:0] 		VectorNum,
+                    input logic [31:0] 		TestNum,
+                    input logic [3:0] 		OpCtrl,
+                    output logic [P.FLEN-1:0] 	Ans,
+                    output logic [P.XLEN-1:0] 	SrcA,
+                    output logic [P.XLEN-1:0] 	SrcB,
+                    output logic [4:0] 		AnsFlg,
+                    output logic 		Xs, Ys, Zs, // sign bits of XYZ
+                    output logic [P.NE-1:0] 	Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
+                    output logic [P.NF:0] 	Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
+                    output logic 		XNaN, YNaN, ZNaN, // is XYZ a NaN
+                    output logic 		XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
+                    output logic 		XSubnorm, ZSubnorm, // is XYZ denormalized
+                    output logic 		XZero, YZero, ZZero, // is XYZ zero
+                    output logic 		XInf, YInf, ZInf, // is XYZ infinity
+                    output logic 		XExpMax,
+                    output logic [2:0]          Funct3E,
+                    output logic                W64,
+                    output logic [P.FLEN-1:0] 	X, Y, Z, XPostBox
+                    );
+
+   localparam Q_LEN = 32'd128;
+   
+   logic 					XEn;
+   logic 					YEn;
+   logic 					ZEn;
+   logic 					FPUActive;   
+
+   // apply test vectors on rising edge of clk
+   // Format of vectors Inputs(1/2/3)_AnsFlg
+   always @(posedge clk) begin
+      AnsFlg = TestVector[4:0];
+      case (Unit)
+        `FMAUNIT:
+          case (Fmt)
+            2'b11: begin // quad
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = TestVector[8+4*(P.Q_LEN)-1:8+3*(P.Q_LEN)];
+                  Y = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                  Z = TestVector[8+2*(P.Q_LEN)-1:8+P.Q_LEN];
+               end
+               else begin
+                  X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                  if (OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)]; else Y = {2'b0, {P.Q_NE-1{1'b1}}, (P.Q_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+               end
+               Ans = TestVector[8+(P.Q_LEN-1):8];
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+4*(P.D_LEN)-1:8+3*(P.D_LEN)]};
+                  Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                  Z = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+P.D_LEN]};
+               end
+               else begin
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]}; 
+                  else Y = {{P.FLEN-P.D_LEN{1'b1}}, 2'b0, {P.D_NE-1{1'b1}}, (P.D_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.D_LEN{1'b1}}, {P.D_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+               end
+               Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+4*(P.S_LEN)-1:8+3*(P.S_LEN)]};
+                  Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                  Z = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+P.S_LEN]};
+               end
+               else begin
+                  X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]}; 
+                  else Y = {{P.FLEN-P.S_LEN{1'b1}}, 2'b0, {P.S_NE-1{1'b1}}, (P.S_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.S_LEN{1'b1}}, {P.S_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+(P.S_LEN)]};
+               end
+               Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+            end
+            2'b10: begin // half
+               if (OpCtrl === `FMA_OPCTRL) begin
+                  X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+4*(P.H_LEN)-1:8+3*(P.H_LEN)]};
+                  Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                  Z = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+P.H_LEN]};
+               end
+               else begin
+                  X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                  if (OpCtrl === `MUL_OPCTRL) Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]}; 
+                  else Y = {{P.FLEN-P.H_LEN{1'b1}}, 2'b0, {P.H_NE-1{1'b1}}, (P.H_NF)'(0)};
+                  if (OpCtrl === `MUL_OPCTRL) Z = {{P.FLEN-P.H_LEN{1'b1}}, {P.H_LEN{1'b0}}}; 
+                  else Z = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+               end
+               Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+            end
+          endcase
+        `DIVUNIT:
+          if (OpCtrl[0])
+            case (Fmt)
+              2'b11: begin // quad
+                 X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                 Ans = TestVector[8+(P.Q_LEN-1):8];
+              end
+              2'b01: if (P.D_SUPPORTED) begin // double
+                 X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                 Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+              end
+              2'b00: if (P.S_SUPPORTED) begin // single
+                 X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                 Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+              end
+              2'b10: begin // half
+                 X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                 Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+              end
+            endcase
+          else
+            case (Fmt)
+              2'b11: begin // quad
+                 X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                 Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                 Ans = TestVector[8+(P.Q_LEN-1):8];
+              end
+              2'b01: if (P.D_SUPPORTED) begin // double
+                 X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                 Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                 Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+              end
+              2'b00: if (P.S_SUPPORTED) begin // single
+                 X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                 Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                 Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+              end
+              2'b10: begin // half
+                 X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                 Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                 Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+              end
+            endcase
+         `INTDIVUNIT: begin
+            if (!(OpCtrl === `DIV_OPCTRL | OpCtrl === `SQRT_OPCTRL)) begin
+               SrcA = TestVector[2*(P.Q_LEN)+P.XLEN-1+12:2*(P.Q_LEN)+12]; 
+               SrcB = TestVector[(P.Q_LEN)+P.XLEN-1+12:P.Q_LEN+12];
+               Ans = TestVector[P.XLEN-1+12:12];
+               // no flag checking for intdiv test cases
+               AnsFlg = 5'bx;
+               case (OpCtrl)
+               `INTDIV_OPCTRL: begin
+                  Funct3E = 3'b100;
+                  W64 = 1'b0;
+               end
+               `INTREM_OPCTRL: begin
+                  Funct3E = 3'b110;
+                  W64 = 1'b0;
+               end
+               `INTREMU_OPCTRL: begin
+                  Funct3E = 3'b111;
+                  W64 = 1'b0;
+               end
+               `INTDIVU_OPCTRL: begin
+                  Funct3E = 3'b101;
+                  W64 = 1'b0;
+               end
+               `INTDIVW_OPCTRL: begin
+                  Funct3E = 3'b100;
+                  W64 = 1'b1;
+               end
+               `INTDIVUW_OPCTRL: begin
+                  Funct3E = 3'b101;
+                  W64 = 1'b1;
+               end
+               `INTREMW_OPCTRL: begin
+                     Funct3E = 3'b110;
+                     W64 = 1'b1;
+               end
+               `INTREMUW_OPCTRL: begin
+                  Funct3E = 3'b111;
+                  W64 = 1'b1;
+               end
+               default: begin
+                  Funct3E = 3'b000;
+                  W64 = 1'b0;
+               end
+               endcase
+            end
+            // testing div/sqrt on drsu
+            else begin
+               if (OpCtrl[0])
+                  case (Fmt)
+                  2'b11: begin // quad
+                     X = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                     Ans = TestVector[8+(P.Q_LEN-1):8];
+                  end
+                  2'b01: if (P.D_SUPPORTED) begin // double
+                     X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                     Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                  end
+                  2'b00: if (P.S_SUPPORTED) begin // single
+                     X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                     Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                  end
+                  2'b10: begin // half
+                     X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                     Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                  end
+                  endcase
+               else
+                  case (Fmt)
+                  2'b11: begin // quad
+                     X = TestVector[8+3*(P.Q_LEN)-1:8+2*(P.Q_LEN)];
+                     Y = TestVector[8+2*(P.Q_LEN)-1:8+(P.Q_LEN)];
+                     Ans = TestVector[8+(P.Q_LEN-1):8];
+                  end
+                  2'b01: if (P.D_SUPPORTED) begin // double
+                     X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+3*(P.D_LEN)-1:8+2*(P.D_LEN)]};
+                     Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+2*(P.D_LEN)-1:8+(P.D_LEN)]};
+                     Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                  end
+                  2'b00: if (P.S_SUPPORTED) begin // single
+                     X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+3*(P.S_LEN)-1:8+2*(P.S_LEN)]};
+                     Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+2*(P.S_LEN)-1:8+1*(P.S_LEN)]};
+                     Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                  end
+                  2'b10: begin // half
+                     X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+3*(P.H_LEN)-1:8+2*(P.H_LEN)]};
+                     Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+2*(P.H_LEN)-1:8+(P.H_LEN)]};
+                     Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                  end
+                  endcase
+            end
+         end
+        `CMPUNIT:
+          case (Fmt)        
+            2'b11: begin // quad
+               X = TestVector[12+2*(P.Q_LEN)-1:12+(P.Q_LEN)];
+               Y = TestVector[12+(P.Q_LEN)-1:12];
+               Ans = TestVector[8];
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[12+2*(P.D_LEN)-1:12+(P.D_LEN)]};
+               Y = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[12+(P.D_LEN)-1:12]};
+               Ans = TestVector[8];
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[12+2*(P.S_LEN)-1:12+(P.S_LEN)]};
+               Y = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[12+(P.S_LEN)-1:12]};
+               Ans = TestVector[8];
+            end
+            2'b10: begin // half
+               X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[12+2*(P.H_LEN)-1:12+(P.H_LEN)]};
+               Y = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[12+(P.H_LEN)-1:12]};
+               Ans = TestVector[8];
+            end
+          endcase
+        `CVTFPUNIT:
+          case (Fmt)
+            2'b11: begin // quad
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {TestVector[8+P.Q_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {TestVector[8+P.Q_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {TestVector[8+P.Q_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {TestVector[8+P.Q_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	begin // double
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.D_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	begin // single
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.S_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+            2'b10: begin // half
+               case (OpCtrl[1:0])
+                 2'b11: begin // quad
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.Q_LEN-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	if (P.D_SUPPORTED) begin // double
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.D_LEN-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b00:	if (P.S_SUPPORTED) begin // single
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.S_LEN-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // half
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.H_LEN+P.H_LEN-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+               endcase
+            end
+          endcase        
+        `CVTINTUNIT:
+          case (Fmt)
+            2'b11: begin // quad
+               // {is the integer a long, is the opperation to an integer}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> quad
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.Q_LEN+P.XLEN-1:8+(P.Q_LEN)];
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b10:	begin // int -> quad
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.Q_LEN+32-1]}}, TestVector[8+P.Q_LEN+32-1:8+(P.Q_LEN)]};
+                    Ans = TestVector[8+(P.Q_LEN-1):8];
+                 end
+                 2'b01:	begin // quad -> long
+                    X = {TestVector[8+P.XLEN+P.Q_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // quad -> int
+                    X = {TestVector[8+32+P.Q_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+            2'b01: if (P.D_SUPPORTED) begin // double
+               // {Int->Fp?, is the integer a long}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> double
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.D_LEN+P.XLEN-1:8+(P.D_LEN)];
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> double
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.D_LEN+32-1]}}, TestVector[8+P.D_LEN+32-1:8+(P.D_LEN)]};
+                    Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]};
+                 end
+                 2'b01:	begin // double -> long
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // double -> int
+                    X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+            2'b00: if (P.S_SUPPORTED) begin // single
+               // {is the integer a long, is the opperation to an integer}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> single
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.S_LEN+P.XLEN-1:8+(P.S_LEN)];
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> single
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.S_LEN+32-1]}}, TestVector[8+P.S_LEN+32-1:8+(P.S_LEN)]};
+                    Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]};
+                 end
+                 2'b01:	begin // single -> long
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // single -> int
+                    X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+            2'b10: begin // half
+               // {is the integer a long, is the opperation to an integer}
+               casex ({OpCtrl[2:1]})
+                 2'b11: begin // long -> half
+                    X = {P.FLEN{1'bx}};
+                    SrcA = TestVector[8+P.H_LEN+P.XLEN-1:8+(P.H_LEN)];
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+                 2'b10:	begin // int -> half
+                    // correctly sign extend the integer depending on if it's a signed/unsigned test
+                    X = {P.FLEN{1'bx}};
+                    SrcA = {{P.XLEN-32{TestVector[8+P.H_LEN+32-1]}}, TestVector[8+P.H_LEN+32-1:8+(P.H_LEN)]};
+                    Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
+                 end
+                 2'b01:	begin // half -> long
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {TestVector[8+(P.XLEN-1):8]};
+                 end
+                 2'b00:	begin // half -> int
+                    X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]};
+                    SrcA = {P.XLEN{1'bx}};
+                    Ans = {{P.XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]};
+                 end
+               endcase
+            end
+          endcase
+      endcase  
+   end
+
+   assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]);
+   assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]) | ((Unit == `INTDIVUNIT) & OpCtrl === `SQRT_OPCTRL));
+   assign ZEn = (Unit == `FMAUNIT);
+   assign FPUActive = 1'b1;
+   
+   unpack #(P) unpack(.X, .Y, .Z, .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
+                      .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
+                      .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
+                      .XEn, .YEn, .ZEn, .XExpMax, .XPostBox);
+
+endmodule
diff --git a/tests/fp/combined_IF_vectors/create_IF_vectors.sh b/tests/fp/combined_IF_vectors/create_IF_vectors.sh
index 707b2d5f4..7fe5897fb 100755
--- a/tests/fp/combined_IF_vectors/create_IF_vectors.sh
+++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh
@@ -1,5 +1,7 @@
 #!/bin/sh
 # create test vectors for stand alone int
 
+mkdir IF_vectors
 ./extract_testfloat_vectors.py
 ./extract_arch_vectors.py
+cp IF_vectors/*  ../vectors