diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh
index dc6c9bb00..f11b71c0a 100644
--- a/pipelined/config/buildroot/wally-config.vh
+++ b/pipelined/config/buildroot/wally-config.vh
@@ -124,8 +124,6 @@
 `define PLIC_NUM_SRC 53
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/buildroot/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/buildroot/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh
index 15b2e08e7..7d083f3b5 100644
--- a/pipelined/config/rv32e/wally-config.vh
+++ b/pipelined/config/rv32e/wally-config.vh
@@ -130,8 +130,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
 `define BPRED_ENABLED 0
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh
index 3522fd1e6..70124d551 100644
--- a/pipelined/config/rv32gc/wally-config.vh
+++ b/pipelined/config/rv32gc/wally-config.vh
@@ -128,8 +128,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh
index 80d167a3d..d44072d6a 100644
--- a/pipelined/config/rv32i/wally-config.vh
+++ b/pipelined/config/rv32i/wally-config.vh
@@ -130,8 +130,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv32i/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv32i/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh
index 13b2eb747..e42fd3100 100644
--- a/pipelined/config/rv32ic/wally-config.vh
+++ b/pipelined/config/rv32ic/wally-config.vh
@@ -128,8 +128,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh
index 82f8446bb..3bc745eb1 100644
--- a/pipelined/config/rv64BP/wally-config.vh
+++ b/pipelined/config/rv64BP/wally-config.vh
@@ -130,8 +130,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
 `define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL"  or BPLOCALPAg or BPGSHARE
diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index bcc791338..cc8d1b2b8 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -32,7 +32,7 @@
 `define DESIGN_COMPILER 0
 
 // RV32 or RV64: XLEN = 32 or 64
-`define XLEN 64
+`define XLEN 32
 
 // IEEE 754 compliance
 `define IEEE754 0
@@ -132,8 +132,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh
index 08e8006ce..0dee000e2 100644
--- a/pipelined/config/rv64fpquad/wally-config.vh
+++ b/pipelined/config/rv64fpquad/wally-config.vh
@@ -131,8 +131,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh
index 042364aca..9afa1a679 100644
--- a/pipelined/config/rv64gc/wally-config.vh
+++ b/pipelined/config/rv64gc/wally-config.vh
@@ -131,8 +131,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh
index 402c3b364..67ca51a7a 100644
--- a/pipelined/config/rv64i/wally-config.vh
+++ b/pipelined/config/rv64i/wally-config.vh
@@ -131,8 +131,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv64i/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv64i/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh
index 491759359..fca1f2609 100644
--- a/pipelined/config/rv64ic/wally-config.vh
+++ b/pipelined/config/rv64ic/wally-config.vh
@@ -131,8 +131,6 @@
 `define PLIC_GPIO_ID 3
 `define PLIC_UART_ID 10
 
-`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
 `define BPRED_ENABLED 1
 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
 `define TESTSBP 0
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index afe822f46..3c2699da0 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -94,11 +94,12 @@
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
 
 // largest length in IEU/FPU
-`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
+`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
 `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
-`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
-`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
-`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
+`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
+`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
+`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
+`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
 
 // Disable spurious Verilator warnings
 
diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile
index 0a9e7d993..5ad721722 100644
--- a/pipelined/regression/Makefile
+++ b/pipelined/regression/Makefile
@@ -1,30 +1,9 @@
-make allclean:
-	make clean
-	make all
-
-make clean:
-	make clean -C ../../tests/riscof
-	make clean -C ../../tests/wally-riscv-arch-test
-#	make allclean -C ../../tests/imperas-riscv-tests
-
-make all:
+all: archtests wallytests memfiles
 	# *** Build old tests/imperas-riscv-tests for now;
 	# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
 	# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
 	#make -C ../../tests/imperas-riscv-tests --jobs
 	#make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs
-	
-	# Build riscv-arch-test 64 and 32-bit versions
-	make -C ../../tests/riscof/ --jobs
-	make -C ../../tests/riscof/ XLEN=32 --jobs
-
-	# Build wally-riscv-arch-test
-	make -C ../../tests/wally-riscv-arch-test/ --jobs
-	make -C ../../tests/wally-riscv-arch-test/ XLEN=32  --jobs
-
-# build the memfiles and address files.
-	make -f makefile-memfile wally-sim-files --jobs
-
 	# Only compile Imperas tests if they are installed locally.  
 	# They are usually a symlink to $RISCV/imperas-riscv-tests and only 
 	# get compiled there manually during installation
@@ -36,4 +15,22 @@ make all:
 	# Link Linux test vectors (fix this later***)
 	#cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh
 
-	
+allclean: clean all
+
+clean:
+	make clean -C ../../tests/riscof
+	make clean -C ../../tests/wally-riscv-arch-test
+#	make allclean -C ../../tests/imperas-riscv-tests
+
+archtests:
+	# Build riscv-arch-test 64 and 32-bit versions
+	make -C ../../tests/riscof/ --jobs
+	make -C ../../tests/riscof/ XLEN=32 --jobs
+
+wallytests:
+	# Build wally-riscv-arch-test
+	make -C ../../tests/wally-riscv-arch-test/ --jobs
+	make -C ../../tests/wally-riscv-arch-test/ XLEN=32  --jobs
+
+memfiles:
+	make -f makefile-memfile wally-sim-files --jobs
diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally
index 2b5288d51..750486c4e 100755
--- a/pipelined/regression/lint-wally
+++ b/pipelined/regression/lint-wally
@@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
 verilator=`which verilator`
 
 basepath=$(dirname $0)/..
-for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do
+for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do
     echo "$config linting..."
     if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
         echo "Exiting after $config lint due to errors or warnings"
diff --git a/pipelined/regression/makefile-memfile b/pipelined/regression/makefile-memfile
index 892e6db9b..c41963864 100644
--- a/pipelined/regression/makefile-memfile
+++ b/pipelined/regression/makefile-memfile
@@ -8,8 +8,9 @@ IMPERASDIR	:= $(ROOT)/tests/imperas-riscv-tests
 ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) 
 
 ELFFILES	?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf")
+OBJDUMPFILES	?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump")
 MEMFILES ?= $(ELFFILES:.elf=.elf.memfile)
-ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr)
+ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr)
 
 print:
 	echo "files in $(ALLDIRS) are $(ELFFILES)."
diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally
index 664f99648..07058241d 100755
--- a/pipelined/regression/regression-wally
+++ b/pipelined/regression/regression-wally
@@ -71,7 +71,7 @@ for test in tests64gc:
         grepstr="All tests ran without failures")
   configs.append(tc)
 
-tests32gc = ["arch32i", "arch32priv", "arch32c",  "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a",  "imperas32c", "wally32priv"]  #, "imperas32mmu""wally32i", 
+tests32gc = ["arch32i", "arch32priv", "arch32c",  "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a",  "imperas32c", "wally32priv", "wally32periph"]  #, "imperas32mmu""wally32i", 
 for test in tests32gc:
   tc = TestCase(
         name=test,
diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do
index 68c240c8a..db6948699 100644
--- a/pipelined/regression/testfloat.do
+++ b/pipelined/regression/testfloat.do
@@ -32,7 +32,7 @@ vlib work
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
-vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 
+vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv  ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 
 
 vsim -voptargs=+acc work.testbenchfp -G TEST=$2
 
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 61b35a51b..a58400cca 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -7,3 +7,22 @@ add wave -noupdate /testbenchfp/Y
 add wave -noupdate /testbenchfp/Z
 add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
+add wave -noupdate /testbenchfp/DivStart
+add wave -noupdate /testbenchfp/DivDone
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
+add wave -group {Testbench} -noupdate /testbenchfp/*
+add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv
index 899dffb77..ab054342f 100644
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@@ -7,10 +7,10 @@ module cvtshiftcalc(
     input logic  [`NE:0]           CvtCalcExpM,    // the calculated expoent
     input logic  [`NF:0]           XManM,          // input mantissas
     input logic     [`FMTBITS-1:0]  OutFmt,       // output format
-    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
     input logic CvtResDenormUfM,
     output logic CvtResUf,
-    output logic [`LGLEN+`NF:0]    CvtShiftIn    // number to be shifted
+    output logic [`CVTLEN+`NF:0]    CvtShiftIn    // number to be shifted
 );
     logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
 
@@ -31,8 +31,8 @@ module cvtshiftcalc(
     //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
     //          - otherwise:
     //              |     LzcInM      | 0's if nessisary | 
-    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
-                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : 
+    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : 
+                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : 
                                    {CvtLzcInM, {`NF+1{1'b0}}};
     
     
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
new file mode 100644
index 000000000..57022e5ae
--- /dev/null
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -0,0 +1,15 @@
+`include "wally-config.vh"
+
+module divshiftcalc(
+    input logic  [`DIVLEN+2:0] Quot,
+    input logic  [`NE:0] DivCalcExpM,
+    output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
+    output logic [`NE:0] CorrDivExp
+);
+    
+    assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
+    // the quotent is in the range [.5,2)
+    // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
+    assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
+
+endmodule
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index a76122804..26ca7dd83 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -12,11 +12,11 @@ module fcvt (
     input logic             XDenormE,   // is the input denormalized
     input logic [`FMTBITS-1:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
     output logic [`NE:0]           CvtCalcExpE,    // the calculated expoent
-	output logic [`LOGLGLEN-1:0] CvtShiftAmtE,  // how much to shift by
+	output logic [`LOGCVTLEN-1:0] CvtShiftAmtE,  // how much to shift by
     output logic                   CvtResDenormUfE,// does the result underflow or is denormalized
     output logic                   CvtResSgnE,     // the result's sign
     output logic                   IntZeroE,      // is the integer zero?
-    output logic [`LGLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
+    output logic [`CVTLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
     );
 
     // OpCtrls:
@@ -43,7 +43,7 @@ module fcvt (
     logic                   Int64;      // is the integer 64 bits?
     logic                   IntToFp;       // is the opperation an int->fp conversion?
     logic                   ToInt;      // is the opperation an fp->int conversion?
-    logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
+    logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC
 
 
     // seperate OpCtrl for code readability
@@ -78,10 +78,10 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
-                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
+    assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
+                             {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
     
-    lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
+    lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
 
     ///////////////////////////////////////////////////////////////////////////
     // shifter
@@ -99,9 +99,9 @@ module fcvt (
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
-                    CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : 
-                              (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
+    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} :
+                    CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : 
+                              (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}};
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
@@ -180,7 +180,7 @@ module fcvt (
     //                  - shift left to normilize (-1-ZeroCnt)
     //                  - newBias to make the biased exponent
     //          
-    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
+    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})};
     // find if the result is dnormal or underflows
     //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
     //      - can't underflow an integer to Fp conversion
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index be73e9e7a..ff83079a8 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -82,7 +82,7 @@ module fpu (
 
    // unpacking signals
    logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM;                       // input's sign - memory stage
+   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
    logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
    logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
    logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
@@ -104,23 +104,27 @@ module fpu (
    logic 		  FOpCtrlQ;   
 
    // Fma Signals
-    logic [3*`NF+5:0]	SumE, SumM;                       
-    logic [`NE+1:0]	    ProdExpE, ProdExpM;
-    logic 			    AddendStickyE, AddendStickyM;
-    logic 			    KillProdE, KillProdM;
-    logic 			    InvZE, InvZM;
-    logic 			    NegSumE, NegSumM;
-    logic 			    ZSgnEffE, ZSgnEffM;
-    logic 			    PSgnE, PSgnM;
-    logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
+   logic [3*`NF+5:0]	SumE, SumM;                       
+   logic [`NE+1:0]	    ProdExpE, ProdExpM;
+   logic 			    AddendStickyE, AddendStickyM;
+   logic 			    KillProdE, KillProdM;
+   logic 			    InvZE, InvZM;
+   logic 			    NegSumE, NegSumM;
+   logic 			    ZSgnEffE, ZSgnEffM;
+   logic 			    PSgnE, PSgnM;
+   logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
 
    // Cvt Signals
-    logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
-	 logic [`LOGLGLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
-    logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
-    logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
-    logic                   IntZeroE, IntZeroM;      // is the integer zero?
-    logic [`LGLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
+   logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
+   logic [`LOGCVTLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+   logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
+   logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
+   logic                   IntZeroE, IntZeroM;      // is the integer zero?
+   logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
+   
+   //divide signals
+   logic [`DIVLEN+2:0] Quot;
+   logic [`NE:0] DivCalcExpM;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -317,7 +321,7 @@ module fpu (
 
    // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
    flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
+   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
    flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
    flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
    flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
@@ -333,7 +337,7 @@ module fpu (
    flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
                            {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
                            {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
-   flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
+   flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                            {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
                            {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
 
@@ -351,9 +355,9 @@ module fpu (
 
    assign FpLoadM = FResSelM[1];
 
-   postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
-                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, 
-                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, 
+   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
+                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
                            .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, 
                            .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, 
                            .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 267647346..d970fdbce 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -30,7 +30,7 @@
 `include "wally-config.vh"
 
 module postprocess(
-    input logic                             XSgnM,  // input signs
+    input logic                             XSgnM, YSgnM,  // input signs
     input logic     [`NE-1:0]               ZExpM, // input exponents
     input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
     input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
@@ -51,13 +51,15 @@ module postprocess(
     input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
     input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    input logic [`NE:0]           DivCalcExpM,    // the calculated expoent
     input logic CvtResDenormUfM,
-	input logic [`LOGLGLEN-1:0] CvtShiftAmtM,  // how much to shift by
+	input logic [`LOGCVTLEN-1:0] CvtShiftAmtM,  // how much to shift by
     input logic                   CvtResSgnM,     // the result's sign
     input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
-    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
     input logic             IntZeroM,         // is the input zero
     input logic [1:0] PostProcSelM, // select result to be written to fp register
+    input logic [`DIVLEN+2:0]   Quot,
     output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
     output logic    [4:0]          PostProcFlgM,
     output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
@@ -75,13 +77,14 @@ module postprocess(
     logic [3*`NF+8:0]            FmaShiftIn;        // is the sum zero
     logic               UfPlus1;                    // do you add one (for determining underflow flag)
     logic               Round;   // bits needed to determine rounding
-    logic [`LGLEN+`NF:0]    CvtShiftIn;    // number to be shifted
+    logic [`CVTLEN+`NF:0]    CvtShiftIn;    // number to be shifted
     logic               Mult;       // multiply opperation
     logic [`FLEN:0]     RoundAdd;       // how much to add to the result
     logic [`NE+1:0]     ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
     logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
     logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt;   // normalization shift count
     logic [$clog2(`NORMSHIFTSZ)-1:0]  ShiftAmt;   // normalization shift count
+    logic [$clog2(`NORMSHIFTSZ)-1:0]  DivShiftAmt;
     logic [`NORMSHIFTSZ-1:0]            ShiftIn;        // is the sum zero
     logic [`NORMSHIFTSZ-1:0]    Shifted;    // the shifted result
     logic                   Plus1;      // add one to the final result?
@@ -91,6 +94,7 @@ module postprocess(
     logic                   IntToFp;       // is the opperation an int->fp conversion?
     logic                   ToInt;      // is the opperation an fp->int conversion?
     logic [`NE+1:0] RoundExp;
+    logic [`NE:0] CorrDivExp;
     logic [1:0] NegResMSBS;
     logic CvtOp;
     logic FmaOp;
@@ -135,6 +139,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
 
     always_comb
         case(PostProcSelM)
@@ -143,12 +148,12 @@ module postprocess(
                 ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
             end
             2'b00: begin // cvt
-                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
-                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM};
+                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
             end
-            2'b01: begin //div
-                ShiftAmt = 0;//{DivShiftAmt};
-                ShiftIn =  0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
+            2'b01: begin //div ***prob can take out
+                ShiftAmt = DivShiftAmt;
+                ShiftIn =  {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
             end
             default: begin 
                 ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
@@ -171,9 +176,9 @@ module postprocess(
     // round to infinity
     // round to nearest max magnitude
 
-    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
+    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
                 .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
-                .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
+                .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
@@ -181,7 +186,7 @@ module postprocess(
 
     resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
                           .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, 
-                          .CvtResSgnM, .RoundSgn, .ResSgn);
+                          .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Flags
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index c8862ff94..9a76cf8f3 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -4,6 +4,8 @@ module resultsign(
     input logic [2:0]   FrmM,
     input logic         PSgnM, ZSgnEffM,
     input logic         InvZM,
+    input logic         XSgnM,
+    input logic         YSgnM,
     input logic         ZInfM,
     input logic         InfIn,
     input logic         NegSumM,
@@ -25,6 +27,7 @@ module resultsign(
     logic FmaResSgn;
     logic FmaResSgnTmp;
     logic Underflow;
+    logic DivSgn;
     // logic ResultSgnTmp;
 
     // Determine the sign if the sum is zero
@@ -43,9 +46,10 @@ module resultsign(
     assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
     assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
 
-    // Sign for rounding calulation
-    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
+    assign DivSgn = XSgnM^YSgnM;
 
-    assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
+    // Sign for rounding calulation
+    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
+    assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 92f1d4c27..73395caed 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -11,6 +11,7 @@ module round(
     input logic  [`FMTBITS-1:0] OutFmt,       // precision 1 = double 0 = single
     input logic  [2:0]          FrmM,       // rounding mode
     input logic                 FmaOp,
+    input logic                 DivOp,
     input logic [1:0] PostProcSelM,
     input logic                 CvtResDenormUfM,
     input logic                 ToInt,
@@ -23,6 +24,7 @@ module round(
     input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
     input logic                 RoundSgn,      // the result's sign
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
+    input logic [`NE:0]           CorrDivExp,    // the calculated expoent
     output logic                UfPlus1,  // do you add or subtract on from the result
     output logic [`NE+1:0]      FullResExp,      // ResExp with bits to determine sign and overflow
     output logic [`NF-1:0]      ResFrac,         // Result fraction
@@ -303,7 +305,7 @@ module round(
         case(PostProcSelM)
             2'b10: RoundExp = SumExp; // fma
             2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
-            2'b01: RoundExp = 0; // divide
+            2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
             default: RoundExp = 0; 
         endcase
 
diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv
index d0d3c40a8..51fe54214 100644
--- a/pipelined/src/generic/flop/bram1p1rw.sv
+++ b/pipelined/src/generic/flop/bram1p1rw.sv
@@ -54,10 +54,6 @@ module bram1p1rw
   logic [DATA_WIDTH-1:0] 			 RAM [(2**ADDR_WIDTH)-1:0];
   integer 							 i;
 
-  initial begin
-	$readmemh("big64.txt", RAM);
-  end
-
   always @ (posedge clk) begin
 	dout <= RAM[addr];    
 	if(we) begin
diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv
index 47acfddc2..4781360e5 100644
--- a/pipelined/src/uncore/clint.sv
+++ b/pipelined/src/uncore/clint.sv
@@ -60,7 +60,7 @@ module clint (
   flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd);
 
   assign HRESPCLINT = 0; // OK
-  assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses 
+  assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses 
   
   // word aligned reads
   if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000};
@@ -87,8 +87,7 @@ module clint (
     always_ff @(posedge HCLK or negedge HRESETn) 
       if (~HRESETn) begin
         MSIP <= 0;
-        MTIMECMP <= 0;
-        // MTIMECMP is not reset
+        MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts
       end else if (memwrite) begin
         if (entryd == 16'h0000) MSIP <= HWDATA[0];
         if (entryd == 16'h4000) begin
@@ -104,7 +103,6 @@ module clint (
     always_ff @(posedge HCLK or negedge HRESETn) 
       if (~HRESETn) begin
         MTIME <= 0;
-        // MTIMECMP is not reset
       end else if (memwrite & entryd == 16'hBFF8) begin
         // MTIME Counter.  Eventually change this to run off separate clock.  Synchronization then needed
         for(j=0;j<`XLEN/8;j++)
diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile
index 63146339c..49b21be7a 100644
--- a/pipelined/srt/Makefile
+++ b/pipelined/srt/Makefile
@@ -1,4 +1,4 @@
-all: exptestgen testgen qslc_r4a2 qslc_r4a2b
+all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
 
 sqrttestgen: sqrttestgen.c
 	gcc sqrttestgen.c -o sqrttestgen -lm
@@ -19,5 +19,9 @@ qslc_r4a2b: qslc_r4a2b.c
 	gcc qslc_r4a2b.c -o qslc_r4a2b -lm
 	./qslc_r4a2b > qslc_r4a2b.tv
 
+qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
+	gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
+	./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
+
 clean:
-	rm -f testgen exptestgen qslc_r4a2 
+	rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
diff --git a/pipelined/srt/qsel4.dat b/pipelined/srt/qsel4.dat
new file mode 100644
index 000000000..b92d81e8e
--- /dev/null
+++ b/pipelined/srt/qsel4.dat
@@ -0,0 +1,1024 @@
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/pipelined/srt/qsel4.sv b/pipelined/srt/qsel4.sv
index 069f4268c..70b8b92d2 100644
--- a/pipelined/srt/qsel4.sv
+++ b/pipelined/srt/qsel4.sv
@@ -11,7 +11,7 @@ module qsel4 (
 	logic [2:0] Dmsbs;
 	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
 	assign Wmsbs = PreWmsbs[7:1];
-	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
+        assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
 	// D = 0001.xxx...
 	// Dmsbs = |   |
     // W =      xxxx.xxx...
diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2
new file mode 100755
index 000000000..5cff70cdf
Binary files /dev/null and b/pipelined/srt/qslc_sqrt_r4a2 differ
diff --git a/pipelined/srt/qslc_sqrt_r4a2.c b/pipelined/srt/qslc_sqrt_r4a2.c
new file mode 100644
index 000000000..252293cc0
--- /dev/null
+++ b/pipelined/srt/qslc_sqrt_r4a2.c
@@ -0,0 +1,198 @@
+/*
+  Program:      qslc_r4a2.c
+  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
+  User:         James E. Stine
+
+*/
+
+#include <stdio.h>
+#include <math.h>
+
+#define DIVISOR_SIZE 3
+#define CARRY_SIZE 7
+#define SUM_SIZE 7
+#define TOT_SIZE 7
+
+void disp_binary(double, int, int);
+
+struct bits {
+  unsigned int divisor : DIVISOR_SIZE;
+  int tot : TOT_SIZE;
+} pla;
+
+/* 
+
+   Function:      disp_binary
+   Description:   This function displays a Double-Precision number into
+   four 16 bit integers using the global union variable 
+   dp_number
+   Argument List: double x            The value to be converted
+   int bits_to_left    Number of bits left of radix point
+   int bits_to_right   Number of bits right of radix point
+   Return value:  none
+
+*/
+void disp_binary(double x, int bits_to_left, int bits_to_right) {
+  int i; 
+  double diff;
+
+  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
+    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+      printf("0");
+    }
+    if (i == bits_to_right+1) 
+      ;
+    
+    return;
+  }
+
+  if (x < 0.0) 
+    x = pow(2.0, ((double) bits_to_left)) + x;
+
+  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
+    diff = pow(2.0, ((double) -i) );
+    if (x < diff) 
+      printf("0");
+    else {
+      printf("1");
+      x -= diff;
+    }
+    if (i == 0) 
+      ;
+    
+  }
+
+}
+
+int main() {
+  int m;
+  int n;
+  int o;
+  pla.divisor = 0;
+  pla.tot = 0;
+  printf("\tcase({D[5:3],Wmsbs})\n");
+  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
+    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
+      printf("\t\t11'b");
+      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
+      printf("_");
+      disp_binary((double) pla.tot, TOT_SIZE, 0);
+      printf(": q = 4'b");
+
+      /*
+	4 bits for Radix 4 (a=2)
+	1000 = +2
+	0100 = +1
+	0000 =  0
+	0010 = -1
+	0001 = -2		
+      */
+      switch (pla.divisor) {
+      case 0:
+	if ((pla.tot) >= 24)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -8)
+	  printf("0000");
+	else if ((pla.tot) >= -26)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 1:
+	if ((pla.tot) >= 28)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -10)
+	  printf("0000");
+	else if ((pla.tot) >= -28)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 2:
+	if ((pla.tot) >= 32)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -12)
+	  printf("0000");
+	else if ((pla.tot) >= -32)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 3:
+	if ((pla.tot) >= 32)
+	  printf("1000");
+	else if ((pla.tot) >= 8)
+	  printf("0100");
+	else if ((pla.tot) >= -12)
+	  printf("0000");
+	else if ((pla.tot) >= -34)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 4:
+	if ((pla.tot) >= 36)
+	  printf("1000");
+	else if ((pla.tot) >= 12)
+	  printf("0100");
+	else if ((pla.tot) >= -12)
+	  printf("0000");
+	else if ((pla.tot) >= -36)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 5:
+	if ((pla.tot) >= 40)
+	  printf("1000");
+	else if ((pla.tot) >= 12)
+	  printf("0100");
+	else if ((pla.tot) >= -16)
+	  printf("0000");
+	else if ((pla.tot) >= -40)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 6:
+	if ((pla.tot) >= 40)
+	  printf("1000");
+	else if ((pla.tot) >= 16)
+	  printf("0100");
+	else if ((pla.tot) >= -16)
+	  printf("0000");
+	else if ((pla.tot) >= -44)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      case 7:
+	if ((pla.tot) >= 44)
+	  printf("1000");
+	else if ((pla.tot) >= 16)
+	  printf("0100");
+	else if ((pla.tot) >= -16)
+	  printf("0000");
+	else if ((pla.tot) >= -46)
+	  printf("0010");
+	else
+	  printf("0001");
+	break;
+      default: printf ("XXX");
+			
+      }
+			
+      printf(";\n");
+      (pla.tot)++;
+    }
+    (pla.divisor)++;
+  }
+  printf("\tendcase\n");
+  
+}
diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv
new file mode 100644
index 000000000..805dbbaeb
--- /dev/null
+++ b/pipelined/srt/qslc_sqrt_r4a2.sv
@@ -0,0 +1,1026 @@
+	case({D[5:3],Wmsbs})
+		11'b000_0000000: q = 4'b0000;
+		11'b000_0000001: q = 4'b0000;
+		11'b000_0000010: q = 4'b0000;
+		11'b000_0000011: q = 4'b0000;
+		11'b000_0000100: q = 4'b0000;
+		11'b000_0000101: q = 4'b0000;
+		11'b000_0000110: q = 4'b0000;
+		11'b000_0000111: q = 4'b0000;
+		11'b000_0001000: q = 4'b0100;
+		11'b000_0001001: q = 4'b0100;
+		11'b000_0001010: q = 4'b0100;
+		11'b000_0001011: q = 4'b0100;
+		11'b000_0001100: q = 4'b0100;
+		11'b000_0001101: q = 4'b0100;
+		11'b000_0001110: q = 4'b0100;
+		11'b000_0001111: q = 4'b0100;
+		11'b000_0010000: q = 4'b0100;
+		11'b000_0010001: q = 4'b0100;
+		11'b000_0010010: q = 4'b0100;
+		11'b000_0010011: q = 4'b0100;
+		11'b000_0010100: q = 4'b0100;
+		11'b000_0010101: q = 4'b0100;
+		11'b000_0010110: q = 4'b0100;
+		11'b000_0010111: q = 4'b0100;
+		11'b000_0011000: q = 4'b1000;
+		11'b000_0011001: q = 4'b1000;
+		11'b000_0011010: q = 4'b1000;
+		11'b000_0011011: q = 4'b1000;
+		11'b000_0011100: q = 4'b1000;
+		11'b000_0011101: q = 4'b1000;
+		11'b000_0011110: q = 4'b1000;
+		11'b000_0011111: q = 4'b1000;
+		11'b000_0100000: q = 4'b1000;
+		11'b000_0100001: q = 4'b1000;
+		11'b000_0100010: q = 4'b1000;
+		11'b000_0100011: q = 4'b1000;
+		11'b000_0100100: q = 4'b1000;
+		11'b000_0100101: q = 4'b1000;
+		11'b000_0100110: q = 4'b1000;
+		11'b000_0100111: q = 4'b1000;
+		11'b000_0101000: q = 4'b1000;
+		11'b000_0101001: q = 4'b1000;
+		11'b000_0101010: q = 4'b1000;
+		11'b000_0101011: q = 4'b1000;
+		11'b000_0101100: q = 4'b1000;
+		11'b000_0101101: q = 4'b1000;
+		11'b000_0101110: q = 4'b1000;
+		11'b000_0101111: q = 4'b1000;
+		11'b000_0110000: q = 4'b1000;
+		11'b000_0110001: q = 4'b1000;
+		11'b000_0110010: q = 4'b1000;
+		11'b000_0110011: q = 4'b1000;
+		11'b000_0110100: q = 4'b1000;
+		11'b000_0110101: q = 4'b1000;
+		11'b000_0110110: q = 4'b1000;
+		11'b000_0110111: q = 4'b1000;
+		11'b000_0111000: q = 4'b1000;
+		11'b000_0111001: q = 4'b1000;
+		11'b000_0111010: q = 4'b1000;
+		11'b000_0111011: q = 4'b1000;
+		11'b000_0111100: q = 4'b1000;
+		11'b000_0111101: q = 4'b1000;
+		11'b000_0111110: q = 4'b1000;
+		11'b000_0111111: q = 4'b1000;
+		11'b000_1000000: q = 4'b0001;
+		11'b000_1000001: q = 4'b0001;
+		11'b000_1000010: q = 4'b0001;
+		11'b000_1000011: q = 4'b0001;
+		11'b000_1000100: q = 4'b0001;
+		11'b000_1000101: q = 4'b0001;
+		11'b000_1000110: q = 4'b0001;
+		11'b000_1000111: q = 4'b0001;
+		11'b000_1001000: q = 4'b0001;
+		11'b000_1001001: q = 4'b0001;
+		11'b000_1001010: q = 4'b0001;
+		11'b000_1001011: q = 4'b0001;
+		11'b000_1001100: q = 4'b0001;
+		11'b000_1001101: q = 4'b0001;
+		11'b000_1001110: q = 4'b0001;
+		11'b000_1001111: q = 4'b0001;
+		11'b000_1010000: q = 4'b0001;
+		11'b000_1010001: q = 4'b0001;
+		11'b000_1010010: q = 4'b0001;
+		11'b000_1010011: q = 4'b0001;
+		11'b000_1010100: q = 4'b0001;
+		11'b000_1010101: q = 4'b0001;
+		11'b000_1010110: q = 4'b0001;
+		11'b000_1010111: q = 4'b0001;
+		11'b000_1011000: q = 4'b0001;
+		11'b000_1011001: q = 4'b0001;
+		11'b000_1011010: q = 4'b0001;
+		11'b000_1011011: q = 4'b0001;
+		11'b000_1011100: q = 4'b0001;
+		11'b000_1011101: q = 4'b0001;
+		11'b000_1011110: q = 4'b0001;
+		11'b000_1011111: q = 4'b0001;
+		11'b000_1100000: q = 4'b0001;
+		11'b000_1100001: q = 4'b0001;
+		11'b000_1100010: q = 4'b0001;
+		11'b000_1100011: q = 4'b0001;
+		11'b000_1100100: q = 4'b0001;
+		11'b000_1100101: q = 4'b0001;
+		11'b000_1100110: q = 4'b0010;
+		11'b000_1100111: q = 4'b0010;
+		11'b000_1101000: q = 4'b0010;
+		11'b000_1101001: q = 4'b0010;
+		11'b000_1101010: q = 4'b0010;
+		11'b000_1101011: q = 4'b0010;
+		11'b000_1101100: q = 4'b0010;
+		11'b000_1101101: q = 4'b0010;
+		11'b000_1101110: q = 4'b0010;
+		11'b000_1101111: q = 4'b0010;
+		11'b000_1110000: q = 4'b0010;
+		11'b000_1110001: q = 4'b0010;
+		11'b000_1110010: q = 4'b0010;
+		11'b000_1110011: q = 4'b0010;
+		11'b000_1110100: q = 4'b0010;
+		11'b000_1110101: q = 4'b0010;
+		11'b000_1110110: q = 4'b0010;
+		11'b000_1110111: q = 4'b0010;
+		11'b000_1111000: q = 4'b0000;
+		11'b000_1111001: q = 4'b0000;
+		11'b000_1111010: q = 4'b0000;
+		11'b000_1111011: q = 4'b0000;
+		11'b000_1111100: q = 4'b0000;
+		11'b000_1111101: q = 4'b0000;
+		11'b000_1111110: q = 4'b0000;
+		11'b000_1111111: q = 4'b0000;
+		11'b001_0000000: q = 4'b0000;
+		11'b001_0000001: q = 4'b0000;
+		11'b001_0000010: q = 4'b0000;
+		11'b001_0000011: q = 4'b0000;
+		11'b001_0000100: q = 4'b0000;
+		11'b001_0000101: q = 4'b0000;
+		11'b001_0000110: q = 4'b0000;
+		11'b001_0000111: q = 4'b0000;
+		11'b001_0001000: q = 4'b0100;
+		11'b001_0001001: q = 4'b0100;
+		11'b001_0001010: q = 4'b0100;
+		11'b001_0001011: q = 4'b0100;
+		11'b001_0001100: q = 4'b0100;
+		11'b001_0001101: q = 4'b0100;
+		11'b001_0001110: q = 4'b0100;
+		11'b001_0001111: q = 4'b0100;
+		11'b001_0010000: q = 4'b0100;
+		11'b001_0010001: q = 4'b0100;
+		11'b001_0010010: q = 4'b0100;
+		11'b001_0010011: q = 4'b0100;
+		11'b001_0010100: q = 4'b0100;
+		11'b001_0010101: q = 4'b0100;
+		11'b001_0010110: q = 4'b0100;
+		11'b001_0010111: q = 4'b0100;
+		11'b001_0011000: q = 4'b0100;
+		11'b001_0011001: q = 4'b0100;
+		11'b001_0011010: q = 4'b0100;
+		11'b001_0011011: q = 4'b0100;
+		11'b001_0011100: q = 4'b1000;
+		11'b001_0011101: q = 4'b1000;
+		11'b001_0011110: q = 4'b1000;
+		11'b001_0011111: q = 4'b1000;
+		11'b001_0100000: q = 4'b1000;
+		11'b001_0100001: q = 4'b1000;
+		11'b001_0100010: q = 4'b1000;
+		11'b001_0100011: q = 4'b1000;
+		11'b001_0100100: q = 4'b1000;
+		11'b001_0100101: q = 4'b1000;
+		11'b001_0100110: q = 4'b1000;
+		11'b001_0100111: q = 4'b1000;
+		11'b001_0101000: q = 4'b1000;
+		11'b001_0101001: q = 4'b1000;
+		11'b001_0101010: q = 4'b1000;
+		11'b001_0101011: q = 4'b1000;
+		11'b001_0101100: q = 4'b1000;
+		11'b001_0101101: q = 4'b1000;
+		11'b001_0101110: q = 4'b1000;
+		11'b001_0101111: q = 4'b1000;
+		11'b001_0110000: q = 4'b1000;
+		11'b001_0110001: q = 4'b1000;
+		11'b001_0110010: q = 4'b1000;
+		11'b001_0110011: q = 4'b1000;
+		11'b001_0110100: q = 4'b1000;
+		11'b001_0110101: q = 4'b1000;
+		11'b001_0110110: q = 4'b1000;
+		11'b001_0110111: q = 4'b1000;
+		11'b001_0111000: q = 4'b1000;
+		11'b001_0111001: q = 4'b1000;
+		11'b001_0111010: q = 4'b1000;
+		11'b001_0111011: q = 4'b1000;
+		11'b001_0111100: q = 4'b1000;
+		11'b001_0111101: q = 4'b1000;
+		11'b001_0111110: q = 4'b1000;
+		11'b001_0111111: q = 4'b1000;
+		11'b001_1000000: q = 4'b0001;
+		11'b001_1000001: q = 4'b0001;
+		11'b001_1000010: q = 4'b0001;
+		11'b001_1000011: q = 4'b0001;
+		11'b001_1000100: q = 4'b0001;
+		11'b001_1000101: q = 4'b0001;
+		11'b001_1000110: q = 4'b0001;
+		11'b001_1000111: q = 4'b0001;
+		11'b001_1001000: q = 4'b0001;
+		11'b001_1001001: q = 4'b0001;
+		11'b001_1001010: q = 4'b0001;
+		11'b001_1001011: q = 4'b0001;
+		11'b001_1001100: q = 4'b0001;
+		11'b001_1001101: q = 4'b0001;
+		11'b001_1001110: q = 4'b0001;
+		11'b001_1001111: q = 4'b0001;
+		11'b001_1010000: q = 4'b0001;
+		11'b001_1010001: q = 4'b0001;
+		11'b001_1010010: q = 4'b0001;
+		11'b001_1010011: q = 4'b0001;
+		11'b001_1010100: q = 4'b0001;
+		11'b001_1010101: q = 4'b0001;
+		11'b001_1010110: q = 4'b0001;
+		11'b001_1010111: q = 4'b0001;
+		11'b001_1011000: q = 4'b0001;
+		11'b001_1011001: q = 4'b0001;
+		11'b001_1011010: q = 4'b0001;
+		11'b001_1011011: q = 4'b0001;
+		11'b001_1011100: q = 4'b0001;
+		11'b001_1011101: q = 4'b0001;
+		11'b001_1011110: q = 4'b0001;
+		11'b001_1011111: q = 4'b0001;
+		11'b001_1100000: q = 4'b0001;
+		11'b001_1100001: q = 4'b0001;
+		11'b001_1100010: q = 4'b0001;
+		11'b001_1100011: q = 4'b0001;
+		11'b001_1100100: q = 4'b0010;
+		11'b001_1100101: q = 4'b0010;
+		11'b001_1100110: q = 4'b0010;
+		11'b001_1100111: q = 4'b0010;
+		11'b001_1101000: q = 4'b0010;
+		11'b001_1101001: q = 4'b0010;
+		11'b001_1101010: q = 4'b0010;
+		11'b001_1101011: q = 4'b0010;
+		11'b001_1101100: q = 4'b0010;
+		11'b001_1101101: q = 4'b0010;
+		11'b001_1101110: q = 4'b0010;
+		11'b001_1101111: q = 4'b0010;
+		11'b001_1110000: q = 4'b0010;
+		11'b001_1110001: q = 4'b0010;
+		11'b001_1110010: q = 4'b0010;
+		11'b001_1110011: q = 4'b0010;
+		11'b001_1110100: q = 4'b0010;
+		11'b001_1110101: q = 4'b0010;
+		11'b001_1110110: q = 4'b0000;
+		11'b001_1110111: q = 4'b0000;
+		11'b001_1111000: q = 4'b0000;
+		11'b001_1111001: q = 4'b0000;
+		11'b001_1111010: q = 4'b0000;
+		11'b001_1111011: q = 4'b0000;
+		11'b001_1111100: q = 4'b0000;
+		11'b001_1111101: q = 4'b0000;
+		11'b001_1111110: q = 4'b0000;
+		11'b001_1111111: q = 4'b0000;
+		11'b010_0000000: q = 4'b0000;
+		11'b010_0000001: q = 4'b0000;
+		11'b010_0000010: q = 4'b0000;
+		11'b010_0000011: q = 4'b0000;
+		11'b010_0000100: q = 4'b0000;
+		11'b010_0000101: q = 4'b0000;
+		11'b010_0000110: q = 4'b0000;
+		11'b010_0000111: q = 4'b0000;
+		11'b010_0001000: q = 4'b0100;
+		11'b010_0001001: q = 4'b0100;
+		11'b010_0001010: q = 4'b0100;
+		11'b010_0001011: q = 4'b0100;
+		11'b010_0001100: q = 4'b0100;
+		11'b010_0001101: q = 4'b0100;
+		11'b010_0001110: q = 4'b0100;
+		11'b010_0001111: q = 4'b0100;
+		11'b010_0010000: q = 4'b0100;
+		11'b010_0010001: q = 4'b0100;
+		11'b010_0010010: q = 4'b0100;
+		11'b010_0010011: q = 4'b0100;
+		11'b010_0010100: q = 4'b0100;
+		11'b010_0010101: q = 4'b0100;
+		11'b010_0010110: q = 4'b0100;
+		11'b010_0010111: q = 4'b0100;
+		11'b010_0011000: q = 4'b0100;
+		11'b010_0011001: q = 4'b0100;
+		11'b010_0011010: q = 4'b0100;
+		11'b010_0011011: q = 4'b0100;
+		11'b010_0011100: q = 4'b0100;
+		11'b010_0011101: q = 4'b0100;
+		11'b010_0011110: q = 4'b0100;
+		11'b010_0011111: q = 4'b0100;
+		11'b010_0100000: q = 4'b1000;
+		11'b010_0100001: q = 4'b1000;
+		11'b010_0100010: q = 4'b1000;
+		11'b010_0100011: q = 4'b1000;
+		11'b010_0100100: q = 4'b1000;
+		11'b010_0100101: q = 4'b1000;
+		11'b010_0100110: q = 4'b1000;
+		11'b010_0100111: q = 4'b1000;
+		11'b010_0101000: q = 4'b1000;
+		11'b010_0101001: q = 4'b1000;
+		11'b010_0101010: q = 4'b1000;
+		11'b010_0101011: q = 4'b1000;
+		11'b010_0101100: q = 4'b1000;
+		11'b010_0101101: q = 4'b1000;
+		11'b010_0101110: q = 4'b1000;
+		11'b010_0101111: q = 4'b1000;
+		11'b010_0110000: q = 4'b1000;
+		11'b010_0110001: q = 4'b1000;
+		11'b010_0110010: q = 4'b1000;
+		11'b010_0110011: q = 4'b1000;
+		11'b010_0110100: q = 4'b1000;
+		11'b010_0110101: q = 4'b1000;
+		11'b010_0110110: q = 4'b1000;
+		11'b010_0110111: q = 4'b1000;
+		11'b010_0111000: q = 4'b1000;
+		11'b010_0111001: q = 4'b1000;
+		11'b010_0111010: q = 4'b1000;
+		11'b010_0111011: q = 4'b1000;
+		11'b010_0111100: q = 4'b1000;
+		11'b010_0111101: q = 4'b1000;
+		11'b010_0111110: q = 4'b1000;
+		11'b010_0111111: q = 4'b1000;
+		11'b010_1000000: q = 4'b0001;
+		11'b010_1000001: q = 4'b0001;
+		11'b010_1000010: q = 4'b0001;
+		11'b010_1000011: q = 4'b0001;
+		11'b010_1000100: q = 4'b0001;
+		11'b010_1000101: q = 4'b0001;
+		11'b010_1000110: q = 4'b0001;
+		11'b010_1000111: q = 4'b0001;
+		11'b010_1001000: q = 4'b0001;
+		11'b010_1001001: q = 4'b0001;
+		11'b010_1001010: q = 4'b0001;
+		11'b010_1001011: q = 4'b0001;
+		11'b010_1001100: q = 4'b0001;
+		11'b010_1001101: q = 4'b0001;
+		11'b010_1001110: q = 4'b0001;
+		11'b010_1001111: q = 4'b0001;
+		11'b010_1010000: q = 4'b0001;
+		11'b010_1010001: q = 4'b0001;
+		11'b010_1010010: q = 4'b0001;
+		11'b010_1010011: q = 4'b0001;
+		11'b010_1010100: q = 4'b0001;
+		11'b010_1010101: q = 4'b0001;
+		11'b010_1010110: q = 4'b0001;
+		11'b010_1010111: q = 4'b0001;
+		11'b010_1011000: q = 4'b0001;
+		11'b010_1011001: q = 4'b0001;
+		11'b010_1011010: q = 4'b0001;
+		11'b010_1011011: q = 4'b0001;
+		11'b010_1011100: q = 4'b0001;
+		11'b010_1011101: q = 4'b0001;
+		11'b010_1011110: q = 4'b0001;
+		11'b010_1011111: q = 4'b0001;
+		11'b010_1100000: q = 4'b0010;
+		11'b010_1100001: q = 4'b0010;
+		11'b010_1100010: q = 4'b0010;
+		11'b010_1100011: q = 4'b0010;
+		11'b010_1100100: q = 4'b0010;
+		11'b010_1100101: q = 4'b0010;
+		11'b010_1100110: q = 4'b0010;
+		11'b010_1100111: q = 4'b0010;
+		11'b010_1101000: q = 4'b0010;
+		11'b010_1101001: q = 4'b0010;
+		11'b010_1101010: q = 4'b0010;
+		11'b010_1101011: q = 4'b0010;
+		11'b010_1101100: q = 4'b0010;
+		11'b010_1101101: q = 4'b0010;
+		11'b010_1101110: q = 4'b0010;
+		11'b010_1101111: q = 4'b0010;
+		11'b010_1110000: q = 4'b0010;
+		11'b010_1110001: q = 4'b0010;
+		11'b010_1110010: q = 4'b0010;
+		11'b010_1110011: q = 4'b0010;
+		11'b010_1110100: q = 4'b0000;
+		11'b010_1110101: q = 4'b0000;
+		11'b010_1110110: q = 4'b0000;
+		11'b010_1110111: q = 4'b0000;
+		11'b010_1111000: q = 4'b0000;
+		11'b010_1111001: q = 4'b0000;
+		11'b010_1111010: q = 4'b0000;
+		11'b010_1111011: q = 4'b0000;
+		11'b010_1111100: q = 4'b0000;
+		11'b010_1111101: q = 4'b0000;
+		11'b010_1111110: q = 4'b0000;
+		11'b010_1111111: q = 4'b0000;
+		11'b011_0000000: q = 4'b0000;
+		11'b011_0000001: q = 4'b0000;
+		11'b011_0000010: q = 4'b0000;
+		11'b011_0000011: q = 4'b0000;
+		11'b011_0000100: q = 4'b0000;
+		11'b011_0000101: q = 4'b0000;
+		11'b011_0000110: q = 4'b0000;
+		11'b011_0000111: q = 4'b0000;
+		11'b011_0001000: q = 4'b0100;
+		11'b011_0001001: q = 4'b0100;
+		11'b011_0001010: q = 4'b0100;
+		11'b011_0001011: q = 4'b0100;
+		11'b011_0001100: q = 4'b0100;
+		11'b011_0001101: q = 4'b0100;
+		11'b011_0001110: q = 4'b0100;
+		11'b011_0001111: q = 4'b0100;
+		11'b011_0010000: q = 4'b0100;
+		11'b011_0010001: q = 4'b0100;
+		11'b011_0010010: q = 4'b0100;
+		11'b011_0010011: q = 4'b0100;
+		11'b011_0010100: q = 4'b0100;
+		11'b011_0010101: q = 4'b0100;
+		11'b011_0010110: q = 4'b0100;
+		11'b011_0010111: q = 4'b0100;
+		11'b011_0011000: q = 4'b0100;
+		11'b011_0011001: q = 4'b0100;
+		11'b011_0011010: q = 4'b0100;
+		11'b011_0011011: q = 4'b0100;
+		11'b011_0011100: q = 4'b0100;
+		11'b011_0011101: q = 4'b0100;
+		11'b011_0011110: q = 4'b0100;
+		11'b011_0011111: q = 4'b0100;
+		11'b011_0100000: q = 4'b1000;
+		11'b011_0100001: q = 4'b1000;
+		11'b011_0100010: q = 4'b1000;
+		11'b011_0100011: q = 4'b1000;
+		11'b011_0100100: q = 4'b1000;
+		11'b011_0100101: q = 4'b1000;
+		11'b011_0100110: q = 4'b1000;
+		11'b011_0100111: q = 4'b1000;
+		11'b011_0101000: q = 4'b1000;
+		11'b011_0101001: q = 4'b1000;
+		11'b011_0101010: q = 4'b1000;
+		11'b011_0101011: q = 4'b1000;
+		11'b011_0101100: q = 4'b1000;
+		11'b011_0101101: q = 4'b1000;
+		11'b011_0101110: q = 4'b1000;
+		11'b011_0101111: q = 4'b1000;
+		11'b011_0110000: q = 4'b1000;
+		11'b011_0110001: q = 4'b1000;
+		11'b011_0110010: q = 4'b1000;
+		11'b011_0110011: q = 4'b1000;
+		11'b011_0110100: q = 4'b1000;
+		11'b011_0110101: q = 4'b1000;
+		11'b011_0110110: q = 4'b1000;
+		11'b011_0110111: q = 4'b1000;
+		11'b011_0111000: q = 4'b1000;
+		11'b011_0111001: q = 4'b1000;
+		11'b011_0111010: q = 4'b1000;
+		11'b011_0111011: q = 4'b1000;
+		11'b011_0111100: q = 4'b1000;
+		11'b011_0111101: q = 4'b1000;
+		11'b011_0111110: q = 4'b1000;
+		11'b011_0111111: q = 4'b1000;
+		11'b011_1000000: q = 4'b0001;
+		11'b011_1000001: q = 4'b0001;
+		11'b011_1000010: q = 4'b0001;
+		11'b011_1000011: q = 4'b0001;
+		11'b011_1000100: q = 4'b0001;
+		11'b011_1000101: q = 4'b0001;
+		11'b011_1000110: q = 4'b0001;
+		11'b011_1000111: q = 4'b0001;
+		11'b011_1001000: q = 4'b0001;
+		11'b011_1001001: q = 4'b0001;
+		11'b011_1001010: q = 4'b0001;
+		11'b011_1001011: q = 4'b0001;
+		11'b011_1001100: q = 4'b0001;
+		11'b011_1001101: q = 4'b0001;
+		11'b011_1001110: q = 4'b0001;
+		11'b011_1001111: q = 4'b0001;
+		11'b011_1010000: q = 4'b0001;
+		11'b011_1010001: q = 4'b0001;
+		11'b011_1010010: q = 4'b0001;
+		11'b011_1010011: q = 4'b0001;
+		11'b011_1010100: q = 4'b0001;
+		11'b011_1010101: q = 4'b0001;
+		11'b011_1010110: q = 4'b0001;
+		11'b011_1010111: q = 4'b0001;
+		11'b011_1011000: q = 4'b0001;
+		11'b011_1011001: q = 4'b0001;
+		11'b011_1011010: q = 4'b0001;
+		11'b011_1011011: q = 4'b0001;
+		11'b011_1011100: q = 4'b0001;
+		11'b011_1011101: q = 4'b0001;
+		11'b011_1011110: q = 4'b0010;
+		11'b011_1011111: q = 4'b0010;
+		11'b011_1100000: q = 4'b0010;
+		11'b011_1100001: q = 4'b0010;
+		11'b011_1100010: q = 4'b0010;
+		11'b011_1100011: q = 4'b0010;
+		11'b011_1100100: q = 4'b0010;
+		11'b011_1100101: q = 4'b0010;
+		11'b011_1100110: q = 4'b0010;
+		11'b011_1100111: q = 4'b0010;
+		11'b011_1101000: q = 4'b0010;
+		11'b011_1101001: q = 4'b0010;
+		11'b011_1101010: q = 4'b0010;
+		11'b011_1101011: q = 4'b0010;
+		11'b011_1101100: q = 4'b0010;
+		11'b011_1101101: q = 4'b0010;
+		11'b011_1101110: q = 4'b0010;
+		11'b011_1101111: q = 4'b0010;
+		11'b011_1110000: q = 4'b0010;
+		11'b011_1110001: q = 4'b0010;
+		11'b011_1110010: q = 4'b0010;
+		11'b011_1110011: q = 4'b0010;
+		11'b011_1110100: q = 4'b0000;
+		11'b011_1110101: q = 4'b0000;
+		11'b011_1110110: q = 4'b0000;
+		11'b011_1110111: q = 4'b0000;
+		11'b011_1111000: q = 4'b0000;
+		11'b011_1111001: q = 4'b0000;
+		11'b011_1111010: q = 4'b0000;
+		11'b011_1111011: q = 4'b0000;
+		11'b011_1111100: q = 4'b0000;
+		11'b011_1111101: q = 4'b0000;
+		11'b011_1111110: q = 4'b0000;
+		11'b011_1111111: q = 4'b0000;
+		11'b100_0000000: q = 4'b0000;
+		11'b100_0000001: q = 4'b0000;
+		11'b100_0000010: q = 4'b0000;
+		11'b100_0000011: q = 4'b0000;
+		11'b100_0000100: q = 4'b0000;
+		11'b100_0000101: q = 4'b0000;
+		11'b100_0000110: q = 4'b0000;
+		11'b100_0000111: q = 4'b0000;
+		11'b100_0001000: q = 4'b0000;
+		11'b100_0001001: q = 4'b0000;
+		11'b100_0001010: q = 4'b0000;
+		11'b100_0001011: q = 4'b0000;
+		11'b100_0001100: q = 4'b0100;
+		11'b100_0001101: q = 4'b0100;
+		11'b100_0001110: q = 4'b0100;
+		11'b100_0001111: q = 4'b0100;
+		11'b100_0010000: q = 4'b0100;
+		11'b100_0010001: q = 4'b0100;
+		11'b100_0010010: q = 4'b0100;
+		11'b100_0010011: q = 4'b0100;
+		11'b100_0010100: q = 4'b0100;
+		11'b100_0010101: q = 4'b0100;
+		11'b100_0010110: q = 4'b0100;
+		11'b100_0010111: q = 4'b0100;
+		11'b100_0011000: q = 4'b0100;
+		11'b100_0011001: q = 4'b0100;
+		11'b100_0011010: q = 4'b0100;
+		11'b100_0011011: q = 4'b0100;
+		11'b100_0011100: q = 4'b0100;
+		11'b100_0011101: q = 4'b0100;
+		11'b100_0011110: q = 4'b0100;
+		11'b100_0011111: q = 4'b0100;
+		11'b100_0100000: q = 4'b0100;
+		11'b100_0100001: q = 4'b0100;
+		11'b100_0100010: q = 4'b0100;
+		11'b100_0100011: q = 4'b0100;
+		11'b100_0100100: q = 4'b1000;
+		11'b100_0100101: q = 4'b1000;
+		11'b100_0100110: q = 4'b1000;
+		11'b100_0100111: q = 4'b1000;
+		11'b100_0101000: q = 4'b1000;
+		11'b100_0101001: q = 4'b1000;
+		11'b100_0101010: q = 4'b1000;
+		11'b100_0101011: q = 4'b1000;
+		11'b100_0101100: q = 4'b1000;
+		11'b100_0101101: q = 4'b1000;
+		11'b100_0101110: q = 4'b1000;
+		11'b100_0101111: q = 4'b1000;
+		11'b100_0110000: q = 4'b1000;
+		11'b100_0110001: q = 4'b1000;
+		11'b100_0110010: q = 4'b1000;
+		11'b100_0110011: q = 4'b1000;
+		11'b100_0110100: q = 4'b1000;
+		11'b100_0110101: q = 4'b1000;
+		11'b100_0110110: q = 4'b1000;
+		11'b100_0110111: q = 4'b1000;
+		11'b100_0111000: q = 4'b1000;
+		11'b100_0111001: q = 4'b1000;
+		11'b100_0111010: q = 4'b1000;
+		11'b100_0111011: q = 4'b1000;
+		11'b100_0111100: q = 4'b1000;
+		11'b100_0111101: q = 4'b1000;
+		11'b100_0111110: q = 4'b1000;
+		11'b100_0111111: q = 4'b1000;
+		11'b100_1000000: q = 4'b0001;
+		11'b100_1000001: q = 4'b0001;
+		11'b100_1000010: q = 4'b0001;
+		11'b100_1000011: q = 4'b0001;
+		11'b100_1000100: q = 4'b0001;
+		11'b100_1000101: q = 4'b0001;
+		11'b100_1000110: q = 4'b0001;
+		11'b100_1000111: q = 4'b0001;
+		11'b100_1001000: q = 4'b0001;
+		11'b100_1001001: q = 4'b0001;
+		11'b100_1001010: q = 4'b0001;
+		11'b100_1001011: q = 4'b0001;
+		11'b100_1001100: q = 4'b0001;
+		11'b100_1001101: q = 4'b0001;
+		11'b100_1001110: q = 4'b0001;
+		11'b100_1001111: q = 4'b0001;
+		11'b100_1010000: q = 4'b0001;
+		11'b100_1010001: q = 4'b0001;
+		11'b100_1010010: q = 4'b0001;
+		11'b100_1010011: q = 4'b0001;
+		11'b100_1010100: q = 4'b0001;
+		11'b100_1010101: q = 4'b0001;
+		11'b100_1010110: q = 4'b0001;
+		11'b100_1010111: q = 4'b0001;
+		11'b100_1011000: q = 4'b0001;
+		11'b100_1011001: q = 4'b0001;
+		11'b100_1011010: q = 4'b0001;
+		11'b100_1011011: q = 4'b0001;
+		11'b100_1011100: q = 4'b0010;
+		11'b100_1011101: q = 4'b0010;
+		11'b100_1011110: q = 4'b0010;
+		11'b100_1011111: q = 4'b0010;
+		11'b100_1100000: q = 4'b0010;
+		11'b100_1100001: q = 4'b0010;
+		11'b100_1100010: q = 4'b0010;
+		11'b100_1100011: q = 4'b0010;
+		11'b100_1100100: q = 4'b0010;
+		11'b100_1100101: q = 4'b0010;
+		11'b100_1100110: q = 4'b0010;
+		11'b100_1100111: q = 4'b0010;
+		11'b100_1101000: q = 4'b0010;
+		11'b100_1101001: q = 4'b0010;
+		11'b100_1101010: q = 4'b0010;
+		11'b100_1101011: q = 4'b0010;
+		11'b100_1101100: q = 4'b0010;
+		11'b100_1101101: q = 4'b0010;
+		11'b100_1101110: q = 4'b0010;
+		11'b100_1101111: q = 4'b0010;
+		11'b100_1110000: q = 4'b0010;
+		11'b100_1110001: q = 4'b0010;
+		11'b100_1110010: q = 4'b0010;
+		11'b100_1110011: q = 4'b0010;
+		11'b100_1110100: q = 4'b0000;
+		11'b100_1110101: q = 4'b0000;
+		11'b100_1110110: q = 4'b0000;
+		11'b100_1110111: q = 4'b0000;
+		11'b100_1111000: q = 4'b0000;
+		11'b100_1111001: q = 4'b0000;
+		11'b100_1111010: q = 4'b0000;
+		11'b100_1111011: q = 4'b0000;
+		11'b100_1111100: q = 4'b0000;
+		11'b100_1111101: q = 4'b0000;
+		11'b100_1111110: q = 4'b0000;
+		11'b100_1111111: q = 4'b0000;
+		11'b101_0000000: q = 4'b0000;
+		11'b101_0000001: q = 4'b0000;
+		11'b101_0000010: q = 4'b0000;
+		11'b101_0000011: q = 4'b0000;
+		11'b101_0000100: q = 4'b0000;
+		11'b101_0000101: q = 4'b0000;
+		11'b101_0000110: q = 4'b0000;
+		11'b101_0000111: q = 4'b0000;
+		11'b101_0001000: q = 4'b0000;
+		11'b101_0001001: q = 4'b0000;
+		11'b101_0001010: q = 4'b0000;
+		11'b101_0001011: q = 4'b0000;
+		11'b101_0001100: q = 4'b0100;
+		11'b101_0001101: q = 4'b0100;
+		11'b101_0001110: q = 4'b0100;
+		11'b101_0001111: q = 4'b0100;
+		11'b101_0010000: q = 4'b0100;
+		11'b101_0010001: q = 4'b0100;
+		11'b101_0010010: q = 4'b0100;
+		11'b101_0010011: q = 4'b0100;
+		11'b101_0010100: q = 4'b0100;
+		11'b101_0010101: q = 4'b0100;
+		11'b101_0010110: q = 4'b0100;
+		11'b101_0010111: q = 4'b0100;
+		11'b101_0011000: q = 4'b0100;
+		11'b101_0011001: q = 4'b0100;
+		11'b101_0011010: q = 4'b0100;
+		11'b101_0011011: q = 4'b0100;
+		11'b101_0011100: q = 4'b0100;
+		11'b101_0011101: q = 4'b0100;
+		11'b101_0011110: q = 4'b0100;
+		11'b101_0011111: q = 4'b0100;
+		11'b101_0100000: q = 4'b0100;
+		11'b101_0100001: q = 4'b0100;
+		11'b101_0100010: q = 4'b0100;
+		11'b101_0100011: q = 4'b0100;
+		11'b101_0100100: q = 4'b0100;
+		11'b101_0100101: q = 4'b0100;
+		11'b101_0100110: q = 4'b0100;
+		11'b101_0100111: q = 4'b0100;
+		11'b101_0101000: q = 4'b1000;
+		11'b101_0101001: q = 4'b1000;
+		11'b101_0101010: q = 4'b1000;
+		11'b101_0101011: q = 4'b1000;
+		11'b101_0101100: q = 4'b1000;
+		11'b101_0101101: q = 4'b1000;
+		11'b101_0101110: q = 4'b1000;
+		11'b101_0101111: q = 4'b1000;
+		11'b101_0110000: q = 4'b1000;
+		11'b101_0110001: q = 4'b1000;
+		11'b101_0110010: q = 4'b1000;
+		11'b101_0110011: q = 4'b1000;
+		11'b101_0110100: q = 4'b1000;
+		11'b101_0110101: q = 4'b1000;
+		11'b101_0110110: q = 4'b1000;
+		11'b101_0110111: q = 4'b1000;
+		11'b101_0111000: q = 4'b1000;
+		11'b101_0111001: q = 4'b1000;
+		11'b101_0111010: q = 4'b1000;
+		11'b101_0111011: q = 4'b1000;
+		11'b101_0111100: q = 4'b1000;
+		11'b101_0111101: q = 4'b1000;
+		11'b101_0111110: q = 4'b1000;
+		11'b101_0111111: q = 4'b1000;
+		11'b101_1000000: q = 4'b0001;
+		11'b101_1000001: q = 4'b0001;
+		11'b101_1000010: q = 4'b0001;
+		11'b101_1000011: q = 4'b0001;
+		11'b101_1000100: q = 4'b0001;
+		11'b101_1000101: q = 4'b0001;
+		11'b101_1000110: q = 4'b0001;
+		11'b101_1000111: q = 4'b0001;
+		11'b101_1001000: q = 4'b0001;
+		11'b101_1001001: q = 4'b0001;
+		11'b101_1001010: q = 4'b0001;
+		11'b101_1001011: q = 4'b0001;
+		11'b101_1001100: q = 4'b0001;
+		11'b101_1001101: q = 4'b0001;
+		11'b101_1001110: q = 4'b0001;
+		11'b101_1001111: q = 4'b0001;
+		11'b101_1010000: q = 4'b0001;
+		11'b101_1010001: q = 4'b0001;
+		11'b101_1010010: q = 4'b0001;
+		11'b101_1010011: q = 4'b0001;
+		11'b101_1010100: q = 4'b0001;
+		11'b101_1010101: q = 4'b0001;
+		11'b101_1010110: q = 4'b0001;
+		11'b101_1010111: q = 4'b0001;
+		11'b101_1011000: q = 4'b0010;
+		11'b101_1011001: q = 4'b0010;
+		11'b101_1011010: q = 4'b0010;
+		11'b101_1011011: q = 4'b0010;
+		11'b101_1011100: q = 4'b0010;
+		11'b101_1011101: q = 4'b0010;
+		11'b101_1011110: q = 4'b0010;
+		11'b101_1011111: q = 4'b0010;
+		11'b101_1100000: q = 4'b0010;
+		11'b101_1100001: q = 4'b0010;
+		11'b101_1100010: q = 4'b0010;
+		11'b101_1100011: q = 4'b0010;
+		11'b101_1100100: q = 4'b0010;
+		11'b101_1100101: q = 4'b0010;
+		11'b101_1100110: q = 4'b0010;
+		11'b101_1100111: q = 4'b0010;
+		11'b101_1101000: q = 4'b0010;
+		11'b101_1101001: q = 4'b0010;
+		11'b101_1101010: q = 4'b0010;
+		11'b101_1101011: q = 4'b0010;
+		11'b101_1101100: q = 4'b0010;
+		11'b101_1101101: q = 4'b0010;
+		11'b101_1101110: q = 4'b0010;
+		11'b101_1101111: q = 4'b0010;
+		11'b101_1110000: q = 4'b0000;
+		11'b101_1110001: q = 4'b0000;
+		11'b101_1110010: q = 4'b0000;
+		11'b101_1110011: q = 4'b0000;
+		11'b101_1110100: q = 4'b0000;
+		11'b101_1110101: q = 4'b0000;
+		11'b101_1110110: q = 4'b0000;
+		11'b101_1110111: q = 4'b0000;
+		11'b101_1111000: q = 4'b0000;
+		11'b101_1111001: q = 4'b0000;
+		11'b101_1111010: q = 4'b0000;
+		11'b101_1111011: q = 4'b0000;
+		11'b101_1111100: q = 4'b0000;
+		11'b101_1111101: q = 4'b0000;
+		11'b101_1111110: q = 4'b0000;
+		11'b101_1111111: q = 4'b0000;
+		11'b110_0000000: q = 4'b0000;
+		11'b110_0000001: q = 4'b0000;
+		11'b110_0000010: q = 4'b0000;
+		11'b110_0000011: q = 4'b0000;
+		11'b110_0000100: q = 4'b0000;
+		11'b110_0000101: q = 4'b0000;
+		11'b110_0000110: q = 4'b0000;
+		11'b110_0000111: q = 4'b0000;
+		11'b110_0001000: q = 4'b0000;
+		11'b110_0001001: q = 4'b0000;
+		11'b110_0001010: q = 4'b0000;
+		11'b110_0001011: q = 4'b0000;
+		11'b110_0001100: q = 4'b0000;
+		11'b110_0001101: q = 4'b0000;
+		11'b110_0001110: q = 4'b0000;
+		11'b110_0001111: q = 4'b0000;
+		11'b110_0010000: q = 4'b0100;
+		11'b110_0010001: q = 4'b0100;
+		11'b110_0010010: q = 4'b0100;
+		11'b110_0010011: q = 4'b0100;
+		11'b110_0010100: q = 4'b0100;
+		11'b110_0010101: q = 4'b0100;
+		11'b110_0010110: q = 4'b0100;
+		11'b110_0010111: q = 4'b0100;
+		11'b110_0011000: q = 4'b0100;
+		11'b110_0011001: q = 4'b0100;
+		11'b110_0011010: q = 4'b0100;
+		11'b110_0011011: q = 4'b0100;
+		11'b110_0011100: q = 4'b0100;
+		11'b110_0011101: q = 4'b0100;
+		11'b110_0011110: q = 4'b0100;
+		11'b110_0011111: q = 4'b0100;
+		11'b110_0100000: q = 4'b0100;
+		11'b110_0100001: q = 4'b0100;
+		11'b110_0100010: q = 4'b0100;
+		11'b110_0100011: q = 4'b0100;
+		11'b110_0100100: q = 4'b0100;
+		11'b110_0100101: q = 4'b0100;
+		11'b110_0100110: q = 4'b0100;
+		11'b110_0100111: q = 4'b0100;
+		11'b110_0101000: q = 4'b1000;
+		11'b110_0101001: q = 4'b1000;
+		11'b110_0101010: q = 4'b1000;
+		11'b110_0101011: q = 4'b1000;
+		11'b110_0101100: q = 4'b1000;
+		11'b110_0101101: q = 4'b1000;
+		11'b110_0101110: q = 4'b1000;
+		11'b110_0101111: q = 4'b1000;
+		11'b110_0110000: q = 4'b1000;
+		11'b110_0110001: q = 4'b1000;
+		11'b110_0110010: q = 4'b1000;
+		11'b110_0110011: q = 4'b1000;
+		11'b110_0110100: q = 4'b1000;
+		11'b110_0110101: q = 4'b1000;
+		11'b110_0110110: q = 4'b1000;
+		11'b110_0110111: q = 4'b1000;
+		11'b110_0111000: q = 4'b1000;
+		11'b110_0111001: q = 4'b1000;
+		11'b110_0111010: q = 4'b1000;
+		11'b110_0111011: q = 4'b1000;
+		11'b110_0111100: q = 4'b1000;
+		11'b110_0111101: q = 4'b1000;
+		11'b110_0111110: q = 4'b1000;
+		11'b110_0111111: q = 4'b1000;
+		11'b110_1000000: q = 4'b0001;
+		11'b110_1000001: q = 4'b0001;
+		11'b110_1000010: q = 4'b0001;
+		11'b110_1000011: q = 4'b0001;
+		11'b110_1000100: q = 4'b0001;
+		11'b110_1000101: q = 4'b0001;
+		11'b110_1000110: q = 4'b0001;
+		11'b110_1000111: q = 4'b0001;
+		11'b110_1001000: q = 4'b0001;
+		11'b110_1001001: q = 4'b0001;
+		11'b110_1001010: q = 4'b0001;
+		11'b110_1001011: q = 4'b0001;
+		11'b110_1001100: q = 4'b0001;
+		11'b110_1001101: q = 4'b0001;
+		11'b110_1001110: q = 4'b0001;
+		11'b110_1001111: q = 4'b0001;
+		11'b110_1010000: q = 4'b0001;
+		11'b110_1010001: q = 4'b0001;
+		11'b110_1010010: q = 4'b0001;
+		11'b110_1010011: q = 4'b0001;
+		11'b110_1010100: q = 4'b0010;
+		11'b110_1010101: q = 4'b0010;
+		11'b110_1010110: q = 4'b0010;
+		11'b110_1010111: q = 4'b0010;
+		11'b110_1011000: q = 4'b0010;
+		11'b110_1011001: q = 4'b0010;
+		11'b110_1011010: q = 4'b0010;
+		11'b110_1011011: q = 4'b0010;
+		11'b110_1011100: q = 4'b0010;
+		11'b110_1011101: q = 4'b0010;
+		11'b110_1011110: q = 4'b0010;
+		11'b110_1011111: q = 4'b0010;
+		11'b110_1100000: q = 4'b0010;
+		11'b110_1100001: q = 4'b0010;
+		11'b110_1100010: q = 4'b0010;
+		11'b110_1100011: q = 4'b0010;
+		11'b110_1100100: q = 4'b0010;
+		11'b110_1100101: q = 4'b0010;
+		11'b110_1100110: q = 4'b0010;
+		11'b110_1100111: q = 4'b0010;
+		11'b110_1101000: q = 4'b0010;
+		11'b110_1101001: q = 4'b0010;
+		11'b110_1101010: q = 4'b0010;
+		11'b110_1101011: q = 4'b0010;
+		11'b110_1101100: q = 4'b0010;
+		11'b110_1101101: q = 4'b0010;
+		11'b110_1101110: q = 4'b0010;
+		11'b110_1101111: q = 4'b0010;
+		11'b110_1110000: q = 4'b0000;
+		11'b110_1110001: q = 4'b0000;
+		11'b110_1110010: q = 4'b0000;
+		11'b110_1110011: q = 4'b0000;
+		11'b110_1110100: q = 4'b0000;
+		11'b110_1110101: q = 4'b0000;
+		11'b110_1110110: q = 4'b0000;
+		11'b110_1110111: q = 4'b0000;
+		11'b110_1111000: q = 4'b0000;
+		11'b110_1111001: q = 4'b0000;
+		11'b110_1111010: q = 4'b0000;
+		11'b110_1111011: q = 4'b0000;
+		11'b110_1111100: q = 4'b0000;
+		11'b110_1111101: q = 4'b0000;
+		11'b110_1111110: q = 4'b0000;
+		11'b110_1111111: q = 4'b0000;
+		11'b111_0000000: q = 4'b0000;
+		11'b111_0000001: q = 4'b0000;
+		11'b111_0000010: q = 4'b0000;
+		11'b111_0000011: q = 4'b0000;
+		11'b111_0000100: q = 4'b0000;
+		11'b111_0000101: q = 4'b0000;
+		11'b111_0000110: q = 4'b0000;
+		11'b111_0000111: q = 4'b0000;
+		11'b111_0001000: q = 4'b0000;
+		11'b111_0001001: q = 4'b0000;
+		11'b111_0001010: q = 4'b0000;
+		11'b111_0001011: q = 4'b0000;
+		11'b111_0001100: q = 4'b0000;
+		11'b111_0001101: q = 4'b0000;
+		11'b111_0001110: q = 4'b0000;
+		11'b111_0001111: q = 4'b0000;
+		11'b111_0010000: q = 4'b0100;
+		11'b111_0010001: q = 4'b0100;
+		11'b111_0010010: q = 4'b0100;
+		11'b111_0010011: q = 4'b0100;
+		11'b111_0010100: q = 4'b0100;
+		11'b111_0010101: q = 4'b0100;
+		11'b111_0010110: q = 4'b0100;
+		11'b111_0010111: q = 4'b0100;
+		11'b111_0011000: q = 4'b0100;
+		11'b111_0011001: q = 4'b0100;
+		11'b111_0011010: q = 4'b0100;
+		11'b111_0011011: q = 4'b0100;
+		11'b111_0011100: q = 4'b0100;
+		11'b111_0011101: q = 4'b0100;
+		11'b111_0011110: q = 4'b0100;
+		11'b111_0011111: q = 4'b0100;
+		11'b111_0100000: q = 4'b0100;
+		11'b111_0100001: q = 4'b0100;
+		11'b111_0100010: q = 4'b0100;
+		11'b111_0100011: q = 4'b0100;
+		11'b111_0100100: q = 4'b0100;
+		11'b111_0100101: q = 4'b0100;
+		11'b111_0100110: q = 4'b0100;
+		11'b111_0100111: q = 4'b0100;
+		11'b111_0101000: q = 4'b0100;
+		11'b111_0101001: q = 4'b0100;
+		11'b111_0101010: q = 4'b0100;
+		11'b111_0101011: q = 4'b0100;
+		11'b111_0101100: q = 4'b1000;
+		11'b111_0101101: q = 4'b1000;
+		11'b111_0101110: q = 4'b1000;
+		11'b111_0101111: q = 4'b1000;
+		11'b111_0110000: q = 4'b1000;
+		11'b111_0110001: q = 4'b1000;
+		11'b111_0110010: q = 4'b1000;
+		11'b111_0110011: q = 4'b1000;
+		11'b111_0110100: q = 4'b1000;
+		11'b111_0110101: q = 4'b1000;
+		11'b111_0110110: q = 4'b1000;
+		11'b111_0110111: q = 4'b1000;
+		11'b111_0111000: q = 4'b1000;
+		11'b111_0111001: q = 4'b1000;
+		11'b111_0111010: q = 4'b1000;
+		11'b111_0111011: q = 4'b1000;
+		11'b111_0111100: q = 4'b1000;
+		11'b111_0111101: q = 4'b1000;
+		11'b111_0111110: q = 4'b1000;
+		11'b111_0111111: q = 4'b1000;
+		11'b111_1000000: q = 4'b0001;
+		11'b111_1000001: q = 4'b0001;
+		11'b111_1000010: q = 4'b0001;
+		11'b111_1000011: q = 4'b0001;
+		11'b111_1000100: q = 4'b0001;
+		11'b111_1000101: q = 4'b0001;
+		11'b111_1000110: q = 4'b0001;
+		11'b111_1000111: q = 4'b0001;
+		11'b111_1001000: q = 4'b0001;
+		11'b111_1001001: q = 4'b0001;
+		11'b111_1001010: q = 4'b0001;
+		11'b111_1001011: q = 4'b0001;
+		11'b111_1001100: q = 4'b0001;
+		11'b111_1001101: q = 4'b0001;
+		11'b111_1001110: q = 4'b0001;
+		11'b111_1001111: q = 4'b0001;
+		11'b111_1010000: q = 4'b0001;
+		11'b111_1010001: q = 4'b0001;
+		11'b111_1010010: q = 4'b0010;
+		11'b111_1010011: q = 4'b0010;
+		11'b111_1010100: q = 4'b0010;
+		11'b111_1010101: q = 4'b0010;
+		11'b111_1010110: q = 4'b0010;
+		11'b111_1010111: q = 4'b0010;
+		11'b111_1011000: q = 4'b0010;
+		11'b111_1011001: q = 4'b0010;
+		11'b111_1011010: q = 4'b0010;
+		11'b111_1011011: q = 4'b0010;
+		11'b111_1011100: q = 4'b0010;
+		11'b111_1011101: q = 4'b0010;
+		11'b111_1011110: q = 4'b0010;
+		11'b111_1011111: q = 4'b0010;
+		11'b111_1100000: q = 4'b0010;
+		11'b111_1100001: q = 4'b0010;
+		11'b111_1100010: q = 4'b0010;
+		11'b111_1100011: q = 4'b0010;
+		11'b111_1100100: q = 4'b0010;
+		11'b111_1100101: q = 4'b0010;
+		11'b111_1100110: q = 4'b0010;
+		11'b111_1100111: q = 4'b0010;
+		11'b111_1101000: q = 4'b0010;
+		11'b111_1101001: q = 4'b0010;
+		11'b111_1101010: q = 4'b0010;
+		11'b111_1101011: q = 4'b0010;
+		11'b111_1101100: q = 4'b0010;
+		11'b111_1101101: q = 4'b0010;
+		11'b111_1101110: q = 4'b0010;
+		11'b111_1101111: q = 4'b0010;
+		11'b111_1110000: q = 4'b0000;
+		11'b111_1110001: q = 4'b0000;
+		11'b111_1110010: q = 4'b0000;
+		11'b111_1110011: q = 4'b0000;
+		11'b111_1110100: q = 4'b0000;
+		11'b111_1110101: q = 4'b0000;
+		11'b111_1110110: q = 4'b0000;
+		11'b111_1110111: q = 4'b0000;
+		11'b111_1111000: q = 4'b0000;
+		11'b111_1111001: q = 4'b0000;
+		11'b111_1111010: q = 4'b0000;
+		11'b111_1111011: q = 4'b0000;
+		11'b111_1111100: q = 4'b0000;
+		11'b111_1111101: q = 4'b0000;
+		11'b111_1111110: q = 4'b0000;
+		11'b111_1111111: q = 4'b0000;
+	endcase
diff --git a/pipelined/srt/srt-radix4.do b/pipelined/srt/srt-radix4.do
index b213aa994..07dedfbfe 100644
--- a/pipelined/srt/srt-radix4.do
+++ b/pipelined/srt/srt-radix4.do
@@ -17,7 +17,7 @@ if [file exists work] {
 }
 vlib work
 
-vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
+vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
 vopt +acc work.testbenchradix4 -o workopt 
 vsim workopt
 
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index ccb6453c0..8fd8d5419 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -30,42 +30,35 @@
 
 `include "wally-config.vh"
 
-`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
-
 module srtradix4 (
   input  logic clk,
-  input  logic Start, 
-  input  logic Stall, // *** multiple pipe stages
-  input  logic Flush, // *** multiple pipe stages
-  // Floating Point Inputs
-  // later add exponents, signs, special cases
-  input  logic       XSign, YSign,
-  input  logic [`NE-1:0] XExp, YExp,
-  input  logic [`NF-1:0] XFrac, YFrac,
+  input  logic DivStart, 
+  input  logic [`NE-1:0] XExpE, YExpE,
+  input  logic [`NF:0] XManE, YManE,
   input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
+  input  logic XZeroE,
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
-  output logic       rsign,
-  output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
-  output logic [`NE-1:0] rExp,
-  output logic [3:0] Flags
+  output logic       DivDone,
+  output logic [`DIVLEN+2:0] Quot,
+  output logic [`XLEN-1:0] Rem, // *** later handle integers
+  output logic [`NE:0] DivCalcExpE
 );
 
   // logic           qp, qz, qm; // quotient is +1, 0, or -1
   logic [3:0]     q;
-  logic [`NE-1:0] calcExp;
-  logic           calcSign;
-  logic [`DIVLEN-1:0]  X, Dpreproc;
+  logic [`NE:0] DivCalcExp;
+  logic [`DIVLEN:0]    X;
+  logic [`DIVLEN-1:0]  Dpreproc;
   logic [`DIVLEN+3:0]  WS, WSA, WSN;
   logic [`DIVLEN+3:0]  WC, WCA, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
  
-  srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
+  srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -77,11 +70,11 @@ module srtradix4 (
   //  - otherwise load WSA into the flipflop
   //  *** what does N and A stand for?
   //  *** change shift amount for radix4
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN);
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
   flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
-  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN);
+  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
-  flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
+  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
 
   // Quotient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
@@ -94,9 +87,8 @@ module srtradix4 (
 	// 0001 = -2
   qsel4 qsel4(.D, .WS, .WC, .q);
 
-  // Store the expoenent and sign until division is done
-  flopen #(`NE) expflop(clk, Start, calcExp, rExp);
-  flopen #(1) signflop(clk, Start, calcSign, rsign);
+  // Store the expoenent and sign until division is DivDone
+  flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
 
   // Divisor Selection logic
   // *** radix 4 change to choose -2 to 2
@@ -120,11 +112,11 @@ module srtradix4 (
   csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
   
   //*** change for radix 4
-  otfc4  #(`DIVLEN) otfc4(clk, Start, q, Quot);
+  otfc4 otfc4(.clk, .DivStart, .q, .Quot);
 
-  expcalc expcalc(.XExp, .YExp, .calcExp);
+  expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
 
-  signcalc signcalc(.XSign, .YSign, .calcSign);
+  divcounter divcounter(clk, DivStart, DivDone);
 
 endmodule
 
@@ -132,91 +124,154 @@ endmodule
 // Submodules //
 ////////////////
 
+/////////////
+// counter //
+/////////////
+module divcounter(input  logic clk, 
+               input  logic DivStart, 
+               output logic DivDone);
+ 
+   logic    [5:0]  count;
+
+  // This block of control logic sequences the divider
+  // through its iterations.  You may modify it if you
+  // build a divider which completes in fewer iterations.
+  // You are not responsible for the (trivial) circuit
+  // design of the block.
+
+  always @(posedge clk)
+    begin
+      DivDone = 0;
+      if      (count == `DIVLEN/2+1) DivDone <= #1 1;
+      else if (DivDone | DivStart) DivDone <= #1 0;	
+      if (DivStart) count <= #1 0;
+      else     count <= #1 count+1;
+    end
+endmodule
+
+module qsel4 (
+	input logic [`DIVLEN+3:0] D,
+	input logic [`DIVLEN+3:0] WS, WC,
+	output logic [3:0] q
+);
+	logic [6:0] Wmsbs;
+	logic [7:0] PreWmsbs;
+	logic [2:0] Dmsbs;
+	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
+	assign Wmsbs = PreWmsbs[7:1];
+	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
+	// D = 0001.xxx...
+	// Dmsbs = |   |
+  // W =      xxxx.xxx...
+	// Wmsbs = |        |
+
+	logic [3:0] QSel4[1023:0];
+
+  initial begin 
+    integer d, w, i, w2;
+    for(d=0; d<8; d++)
+      for(w=0; w<128; w++)begin
+        i = d*128+w;
+        w2 = w-128*(w>=64); // convert to two's complement
+        case(d)
+          0: if($signed(w2)>=$signed(12))      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-4)  QSel4[i] = 4'b0000; 
+            else if(w2>=-13) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          1: if(w2>=14)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-15) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          2: if(w2>=15)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-16) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          3: if(w2>=16)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-18) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          4: if(w2>=18)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          5: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          6: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-22) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          7: if(w2>=24)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-24) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+        endcase
+      end
+  end
+	assign q = QSel4[{Dmsbs,Wmsbs}];
+	
+endmodule
+
 ///////////////////
 // Preprocessing //
 ///////////////////
 module srtpreproc (
   input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic [`NF-1:0] XFrac, YFrac,
-  input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
+  input  logic [`NF:0] XManE, YManE,
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
-  output logic [`DIVLEN-1:0] X, D,
+  output logic [`DIVLEN:0] X,
+  output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
   output logic       intSign // Quotient integer sign
 );
 
-  logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
-  logic  [`XLEN-1:0] PosA, PosB;
-  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+  // logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
+  // logic  [`XLEN-1:0] PosA, PosB;
+  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+  logic  [`DIVLEN:0] PreprocA, PreprocX;
+  logic  [`DIVLEN-1:0] PreprocB, PreprocY;
 
-  assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
-  assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
+  // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
+  // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
 
-  lzc #(`XLEN) lzcA (PosA, zeroCntA);
-  lzc #(`XLEN) lzcB (PosB, zeroCntB);
+  // lzc #(`XLEN) lzcA (PosA, zeroCntA);
+  // lzc #(`XLEN) lzcB (PosB, zeroCntB);
 
-  assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
-  assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
+  // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
+  // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
 
-  assign PreprocA = ExtraA << zeroCntA;
-  assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
+  // assign PreprocA = ExtraA << zeroCntA;
+  // assign PreprocB = ExtraB << (zeroCntB + 1);
+  assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
 
   
   assign X = Int ? PreprocA : PreprocX;
-  assign D = Int ? PreprocB : PreprocY;
-  assign intExp = zeroCntB - zeroCntA + 1;
-  assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
+  assign Dpreproc = Int ? PreprocB : PreprocY;
+  // assign intExp = zeroCntB - zeroCntA + 1;
+  // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 endmodule
 
-/////////////////////////////////
-// Quotient Selection, Radix 2 //
-/////////////////////////////////
-module qsel2 ( // *** eventually just change to 4 bits
-  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
-  output logic         qp, qz, qm
-);
- 
-  logic [`DIVLEN+3:`DIVLEN]  p, g;
-  logic          magnitude, sign, cout;
-
-  // The quotient selection logic is presented for simplicity, not
-  // for efficiency.  You can probably optimize your logic to
-  // select the proper divisor with less delay.
-
-  // Quotient equations from EE371 lecture notes 13-20
-  assign p = ps ^ pc;
-  assign g = ps & pc;
-
-  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
-  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
-  assign #1 sign = p[`DIVLEN+3] ^ cout;
-/*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
-			  (ps[52]^pc[52]));
-  assign #1 sign = (ps[55]^pc[55])^
-      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
-			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
-						(ps[52]&pc[52]))))); */
-
-  // Produce quotient = +1, 0, or -1
-  assign #1 qp = magnitude & ~sign;
-  assign #1 qz = ~magnitude;
-  assign #1 qm = magnitude & sign;
-endmodule
-
-
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
-module otfc4 #(parameter N=65) (
+module otfc4 (
   input  logic         clk,
-  input  logic         Start,
+  input  logic         DivStart,
   input  logic [3:0]   q,
-  output logic [N-1:0] r
+  output logic [`DIVLEN+2:0] Quot
 );
 
   //  The on-the-fly converter transfers the quotient 
@@ -224,20 +279,20 @@ module otfc4 #(parameter N=65) (
   //
   //  This code follows the psuedocode presented in the 
   //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-2 division.
+  //  it is written for Radix-4 division.
   //
   //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
-  logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
+  logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
   //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
-  logic [N:0] QR, QMR;
+  logic [`DIVLEN:0] QR, QMR;
   // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux);
-  mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux);
-  flop #(N+3) Qreg(clk, QMux, Q);
-  flop #(N+3) QMreg(clk, QMMux, QM);
+  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
+  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
+  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   // shift Q (quotent) and QM (quotent-1)
 		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
@@ -247,11 +302,9 @@ module otfc4 #(parameter N=65) (
 		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
     // *** how does the 0 concatination numbers work?
 
-
-
   always_comb begin
-    QR  = Q[N:0];
-    QMR = QM[N:0];     // Shift Q and QM
+    QR  = Quot[`DIVLEN:0];
+    QMR = QM[`DIVLEN:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
       QMNext = {QR,  2'b01};
@@ -269,7 +322,8 @@ module otfc4 #(parameter N=65) (
       QMNext = {QMR, 2'b11};
     end 
   end
-  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+  // Quot is in the range [.5, 2) so normalize the result if nesissary
+  // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
 
 endmodule
 
@@ -287,7 +341,7 @@ module csa #(parameter N=69) (
   // This block adds in1, in2, in3, and cin to produce 
   // a result out1 / out2 in carry-save redundant form.
   // cin is just added to the least significant bit and
-  // is required to handle adding a negative divisor.
+  // is Startuired to handle adding a negative divisor.
   // Fortunately, the carry (out2) is shifted left by one
   // bit, leaving room in the least significant bit to 
   // insert cin.
@@ -302,22 +356,11 @@ endmodule
 // expcalc  //
 //////////////
 module expcalc(
-  input logic  [`NE-1:0] XExp, YExp,
-  output logic [`NE-1:0] calcExp
+  input logic  [`NE-1:0] XExpE, YExpE,
+  input logic XZeroE,
+  output logic [`NE:0] DivCalcExp
 );
 
-  assign calcExp = XExp - YExp + (`NE)'(`BIAS);
+  assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
 
 endmodule
-
-//////////////
-// signcalc //
-//////////////
-module signcalc(
-  input logic  XSign, YSign,
-  output logic calcSign
-);
-
-  assign calcSign = XSign ^ YSign;
-
-endmodule
\ No newline at end of file
diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv
index 6ac616ed6..434ef74b0 100644
--- a/pipelined/srt/testbench-radix4.sv
+++ b/pipelined/srt/testbench-radix4.sv
@@ -2,30 +2,6 @@
 `include "wally-config.vh"
 `define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
 
-/////////////
-// counter //
-/////////////
-module counter(input  logic clk, 
-               input  logic req, 
-               output logic done);
- 
-   logic    [5:0]  count;
-
-  // This block of control logic sequences the divider
-  // through its iterations.  You may modify it if you
-  // build a divider which completes in fewer iterations.
-  // You are not responsible for the (trivial) circuit
-  // design of the block.
-
-  always @(posedge clk)
-    begin
-      if      (count == `DIVLEN/2+1) done <= #1 1;
-      else if (done | req) done <= #1 0;	
-      if (req) count <= #1 0;
-      else     count <= #1 count+1;
-    end
-endmodule
-
 ///////////
 // clock //
 ///////////
@@ -43,7 +19,7 @@ endmodule
 module testbenchradix4;
   logic              clk;
   logic              req;
-  logic              done;
+  logic              DivDone;
   logic [63:0]       a, b;
   logic [51:0]       afrac, bfrac;
   logic [10:0]       aExp, bExp;
@@ -65,22 +41,20 @@ module testbenchradix4;
   logic [MEM_WIDTH-1:0] Vec;  // Verilog doesn't allow direct access to a
                             // bit field of an array 
   logic [63:0] correctr, nextr, diffn, diffp;
-  logic [10:0] rExp;
-  logic        rsign;
+  logic [10:0] DivExp;
+  logic        DivSgn;
   integer testnum, errors;
 
   // Divider
-  srtradix4 srtradix4(.clk, .Start(req), 
-                .Stall(1'b0), .Flush(1'b0), 
-                .XExp(aExp), .YExp(bExp), .rExp,
-                .XSign(asign), .YSign(bsign), .rsign,
+  srtradix4 srtradix4(.clk, .DivStart(req), 
+                .XExpE(aExp), .YExpE(bExp), .DivExp,
+                .XSgnE(asign), .YSgnE(bsign), .DivSgn,
                 .XFrac(afrac), .YFrac(bfrac), 
-                .SrcA('0), .SrcB('0), .Fmt(2'b00), 
-                .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), 
-                .Quot, .Rem(), .Flags());
+                .SrcA('0), .SrcB('0),
+                .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone,
+                .Quot, .Rem());
 
   // Counter
-  counter counter(clk, req, done);
 
 
     initial
@@ -112,14 +86,14 @@ module testbenchradix4;
   always @(posedge clk)
     begin
       r = Quot[`DIVLEN-1:`DIVLEN - 52];
-      if (done) begin
+      if (DivDone) begin
         req <= 1;
         diffp = correctr[51:0] - r;
         diffn = r - correctr[51:0];
-        if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+        if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
           begin
             errors = errors+1;
-            $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
+            $display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp);
             $display("failed\n");
             $stop;
           end
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 4bae7d106..e8afb299b 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -48,13 +48,14 @@ module testbenchfp;
   logic                 XInf, YInf, ZInf;                   // is the input infinity
   logic                 XZero, YZero, ZZero;                // is the input zero
   logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
-  logic  [`LGLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
+  logic  [`CVTLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
   logic        IntZeroE;
   logic CvtResSgnE;
-  logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
-	logic [`LOGLGLEN-1:0] CvtShiftAmtE;  // how much to shift by
+	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
+	logic [`DIVLEN+2:0] Quot;
   logic CvtResDenormUfE;
+  logic DivStart, DivDone;
   
 
   // in-between FMA signals
@@ -68,6 +69,8 @@ module testbenchfp;
   logic 			          NegSumE;
   logic 			          ZSgnEffE;
   logic 			          PSgnE;
+  logic       DivSgn;
+  logic [`NE:0] DivCalcExp;
 
 
   ///////////////////////////////////////////////////////////////////////////////////////////////
@@ -205,16 +208,16 @@ module testbenchfp;
             Fmt = {Fmt, 2'b11};
           end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the divide tests/op-ctrls/unit/fmt
-      //   Tests = {Tests, f128div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //     for(int i = 0; i<5; i++) begin
-      //       Unit = {Unit, `DIVUNIT};
-      //       Fmt = {Fmt, 2'b11};
-      //     end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the divide tests/op-ctrls/unit/fmt
+        Tests = {Tests, f128div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'b11};
+          end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if square-root is being tested
       //   // add the square-root tests/op-ctrls/unit/fmt
       //   Tests = {Tests, f128sqrt};
@@ -332,16 +335,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b01};
         end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f64div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b01};
-      //   end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the correct tests/op-ctrls/unit/fmt to their lists
+        Tests = {Tests, f64div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `DIVUNIT};
+          Fmt = {Fmt, 2'b01};
+        end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if square-root is being tessted
       //   // add the correct tests/op-ctrls/unit/fmt to their lists
       //   Tests = {Tests, f64sqrt};
@@ -443,16 +446,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b00};
         end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f32div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b00};
-      //   end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the correct tests/op-ctrls/unit/fmt to their lists
+        Tests = {Tests, f32div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `DIVUNIT};
+          Fmt = {Fmt, 2'b00};
+        end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if sqrt is being tested
       //   // add the correct tests/op-ctrls/unit/fmt to their lists
       //   Tests = {Tests, f32sqrt};
@@ -536,16 +539,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b10};
         end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f16div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b10};
-      //   end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the correct tests/op-ctrls/unit/fmt to their lists
+        Tests = {Tests, f16div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `DIVUNIT};
+          Fmt = {Fmt, 2'b10};
+        end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if sqrt is being tested
       //   // add the correct tests/op-ctrls/unit/fmt to their lists
       //   Tests = {Tests, f16sqrt};
@@ -611,7 +614,7 @@ module testbenchfp;
   readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                     .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
                                     .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
-                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
+                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
                                     .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                     .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
                                     .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
@@ -639,8 +642,8 @@ module testbenchfp;
               .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
               .ProdExpE, .AddendStickyE, .KillProdE); 
               
-  postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
-              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
+  postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
+              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
               .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
               .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
               .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
@@ -650,21 +653,16 @@ module testbenchfp;
               .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
               .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
   
-fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
+  fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
             .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
             .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), 
-  //                 .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
-  //                 .CvtRes, .CvtFlgE);
-  // *** integrade divide and squareroot
-  //  fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), 
-  //        .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
-  //        .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
-  //        .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg));
-
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
+                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
+                .DivDone, .Quot, .Rem());
+                
   assign CmpFlg[3:0] = 0;
 
   // produce clock
@@ -817,7 +815,7 @@ end
   ///////////////////////////////////////////////////////////////////////////////////////////////
 
     // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@@ -840,8 +838,7 @@ end
       $stop;
     end
 
-
-    VectorNum += 1; // increment the vector
+    if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
 
     if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
 
@@ -895,15 +892,17 @@ module readvectors (
   output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
   output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
   output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic XExpMaxE,
+  output logic                    XExpMaxE,
+  output logic                    DivStart,
   output logic [`FLEN-1:0] X, Y, Z
 );
 
   // apply test vectors on rising edge of clk
   // Format of vectors Inputs(1/2/3)_AnsFlg
-  always @(posedge clk) begin
+  always @(VectorNum) begin
     #1; 
     AnsFlg = TestVector[4:0];
+    DivStart = 1'b0;
     case (Unit)
       `FMAUNIT:
         case (Fmt)
@@ -972,21 +971,33 @@ module readvectors (
             X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
             Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
             Ans = TestVector[8+(`Q_LEN-1):8];
+            if (~clk) #5;
+            DivStart = 1'b1; #10 // one clk cycle
+            DivStart = 1'b0;
           end
           2'b01:	begin	  // double
             X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
             Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
             Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
+            if (~clk) #5;
+            DivStart = 1'b1; #10
+            DivStart = 1'b0;
           end
           2'b00:	begin	  // single
             X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
             Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
             Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
+            if (~clk) #5;
+            DivStart = 1'b1; #10
+            DivStart = 1'b0;
           end
           2'b10:	begin	  // half
             X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
             Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
             Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
+            if (~clk) #5;
+            DivStart = 1'b1; #10
+            DivStart = 1'b0;
           end
         endcase
       `CMPUNIT:
diff --git a/pipelined/testbench/testbench.sv.bak b/pipelined/testbench/testbench.sv.bak
deleted file mode 100644
index 8fdde9326..000000000
--- a/pipelined/testbench/testbench.sv.bak
+++ /dev/null
@@ -1,473 +0,0 @@
-///////////////////////////////////////////
-// testbench.sv
-//
-// Written: David_Harris@hmc.edu 9 January 2021
-// Modified: 
-//
-// Purpose: Wally Testbench and helper modules
-//          Applies test programs from the riscv-arch-test and Imperas suites
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// MIT LICENSE
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
-// software and associated documentation files (the "Software"), to deal in the Software 
-// without restriction, including without limitation the rights to use, copy, modify, merge, 
-// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
-// to whom the Software is furnished to do so, subject to the following conditions:
-//
-//   The above copyright notice and this permission notice shall be included in all copies or 
-//   substantial portions of the Software.
-//
-//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
-//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
-//   OR OTHER DEALINGS IN THE SOFTWARE.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-`include "wally-config.vh"
-`include "tests.vh"
-
-module testbench;
-  parameter TESTSPERIPH = 0; // set to 0 for regression
-  parameter TESTSPRIV = 0; // set to 0 for regression
-  parameter DEBUG=0;
-  parameter TEST="none";
- 
-  logic        clk;
-  logic        reset_ext, reset;
-
-  parameter SIGNATURESIZE = 5000000;
-
-  int test, i, errors, totalerrors;
-  logic [31:0] sig32[0:SIGNATURESIZE];
-  logic [`XLEN-1:0] signature[0:SIGNATURESIZE];
-  logic [`XLEN-1:0] testadr;
-  string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
-  logic [31:0] InstrW;
-
-string tests[];
-logic [3:0] dummy;
-
-  string ProgramAddrMapFile, ProgramLabelMapFile;
-  logic [`AHBW-1:0] HRDATAEXT;
-  logic             HREADYEXT, HRESPEXT;
-  logic [31:0]      HADDR;
-  logic [`AHBW-1:0] HWDATA;
-  logic             HWRITE;
-  logic [2:0]       HSIZE;
-  logic [2:0]       HBURST;
-  logic [3:0]       HPROT;
-  logic [1:0]       HTRANS;
-  logic             HMASTLOCK;
-  logic             HCLK, HRESETn;
-  logic [`XLEN-1:0] PCW;
-
-  logic 	    DCacheFlushDone, DCacheFlushStart;
-    
-  flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
-  flopenr  #(32)   InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW,  dut.core.ifu.InstrM, InstrW);
-
-  // check assertions for a legal configuration
-  riscvassertions riscvassertions();
-
-  // pick tests based on modes supported
-  initial begin
-    $display("TEST is %s", TEST);
-    //tests = '{};
-    if (`XLEN == 64) begin // RV64
-      case (TEST)
-        "arch64i":                        tests = arch64i;
-        "arch64priv":                     tests = arch64priv;
-        "arch64c":      if (`C_SUPPORTED) 
-                          if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv};
-                          else                  tests = {arch64c};
-        "arch64m":      if (`M_SUPPORTED) tests = arch64m;
-        "arch64d":      if (`D_SUPPORTED) tests = arch64d;
-        "imperas64i":                     tests = imperas64i;
-        "imperas64p":                     tests = imperas64p;
-//        "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
-        "imperas64f":   if (`F_SUPPORTED) tests = imperas64f;
-        "imperas64d":   if (`D_SUPPORTED) tests = imperas64d;
-        "imperas64m":   if (`M_SUPPORTED) tests = imperas64m;
-        "imperas64a":   if (`A_SUPPORTED) tests = imperas64a;
-        "imperas64c":   if (`C_SUPPORTED) tests = imperas64c;
-                        else              tests = imperas64iNOc;
-        "testsBP64":                      tests = testsBP64;
-        "wally64i":                       tests = wally64i; // *** redo
-        "wally64priv":                    tests = wally64priv;// *** redo
-        "imperas64periph":                tests = imperas64periph;
-        "coremark":                       tests = coremark;
-      endcase 
-    end else begin // RV32
-      case (TEST)
-        "arch32i":                        tests = arch32i;
-        "arch32priv":                     tests = arch32priv;
-        "arch32c":      if (`C_SUPPORTED) 
-                          if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv};
-                          else                  tests = {arch32c};
-        "arch32m":      if (`M_SUPPORTED) tests = arch32m;
-        "arch32f":      if (`F_SUPPORTED) tests = arch32f;
-        "imperas32i":                     tests = imperas32i;
-        "imperas32p":                     tests = imperas32p;
-//        "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
-        "imperas32f":   if (`F_SUPPORTED) tests = imperas32f;
-        "imperas32m":   if (`M_SUPPORTED) tests = imperas32m;
-        "imperas32a":   if (`A_SUPPORTED) tests = imperas32a;
-        "imperas32c":   if (`C_SUPPORTED) tests = imperas32c;
-                        else              tests = imperas32iNOc;
-        "wally32i":                       tests = wally32i; // *** redo
-        "wally32e":                       tests = wally32e; 
-        "wally32priv":                    tests = wally32priv; // *** redo
-        "imperas32periph":                  tests = imperas32periph;
-      endcase
-    end
-    if (tests.size() == 0) begin
-      $display("TEST %s not supported in this configuration", TEST);
-      $stop;
-    end
-  end
-
-  string signame, memfilename, pathname;
-
-  logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
-  logic UARTSin, UARTSout;
-
-  logic SDCCLK;
-  logic      SDCCmdIn;
-  logic      SDCCmdOut;
-  logic      SDCCmdOE;
-  logic [3:0] SDCDatIn;
-
-  logic             HREADY;
-  logic 	    HSELEXT;
-  
-
-  // instantiate device to be tested
-  assign GPIOPinsIn = 0;
-  assign UARTSin = 1;
-  assign HREADYEXT = 1;
-  assign HRESPEXT = 0;
-  assign HRDATAEXT = 0;
-
-  wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
-                        .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
-                        .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
-                        .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); 
-
-  // Track names of instructions
-  instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
-                dut.core.ifu.FinalInstrRawF[31:0],
-                dut.core.ifu.InstrD, dut.core.ifu.InstrE,
-                dut.core.ifu.InstrM,  InstrW,
-                InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
-
-  // initialize tests
-  localparam integer 	   MemStartAddr = `RAM_BASE>>(1+`XLEN/32);
-  localparam integer 	   MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32);
-
-  initial
-    begin
-      test = 1;
-      totalerrors = 0;
-      testadr = 0;
-      // fill memory with defined values to reduce Xs in simulation
-      // Quick note the memory will need to be initialized.  The C library does not
-      //  guarantee the  initialized reads.  For example a strcmp can read 6 byte
-      //  strings, but uses a load double to read them in.  If the last 2 bytes are
-      //  not initialized the compare results in an 'x' which propagates through 
-      // the design.
-      if (TEST == "coremark") 
-        for (i=MemStartAddr; i<MemEndAddr; i = i+1) 
-          dut.uncore.ram.ram.RAM[i] = 64'h0; 
-
-      // read test vectors into memory
-      pathname = tvpaths[tests[0].atoi()];
-/*      if (tests[0] == `IMPERASTEST)
-        pathname = tvpaths[0];
-      else pathname = tvpaths[1]; */
-      memfilename = {pathname, tests[test], ".elf.memfile"};
-      if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
-      else              $readmemh(memfilename, dut.uncore.ram.RAM);
-      if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
-
-      ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
-      ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
-      $display("Read memfile %s", memfilename);
-      reset_ext = 1; # 42; reset_ext = 0;
-    end
-
-  // generate clock to sequence tests
-  always
-    begin
-      clk = 1; # 5; clk = 0; # 5;
-      // if ($time % 100000 == 0) $display("Time is %0t", $time);
-    end
-   
-  // check results
-  always @(negedge clk)
-    begin    
-      if (TEST == "coremark")
-        if (dut.core.priv.priv.ecallM) begin
-          $display("Benchmark: coremark is done.");
-          $stop;
-        end
-      if (DCacheFlushDone) begin
- 
-        #600; // give time for instructions in pipeline to finish
-        // clear signature to prevent contamination from previous tests
-        for(i=0; i<SIGNATURESIZE; i=i+1) begin
-          sig32[i] = 'bx;
-        end
-
-        // read signature, reformat in 64 bits if necessary
-        signame = {pathname, tests[test], ".signature.output"};
-        $readmemh(signame, sig32);
-        i = 0;
-        while (i < SIGNATURESIZE) begin
-          if (`XLEN == 32) begin
-            signature[i] = sig32[i];
-            i = i+1;
-          end else begin
-            signature[i/2] = {sig32[i+1], sig32[i]};
-            i = i + 2;
-          end
-          if (i >= 4 & sig32[i-4] === 'bx) begin
-            if (i == 4) begin
-              i = SIGNATURESIZE+1; // flag empty file
-              $display("  Error: empty test file");
-            end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
-          end
-        end
-
-        // Check errors
-        errors = (i == SIGNATURESIZE+1); // error if file is empty
-        i = 0;
-        testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
-        /* verilator lint_off INFINITELOOP */
-        while (signature[i] !== 'bx) begin
-          logic [`XLEN-1:0] sig;
-          if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.ram.RAM[testadr+i];
-          else                   sig = dut.uncore.ram.RAM[testadr+i];
-//          $display("signature[%h] = %h sig = %h", i, signature[i], sig);
-          if (signature[i] !== sig &
-          //if (signature[i] !== dut.core.lsu.dtim.ram.RAM[testadr+i] &
-	      (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin  // ***i+1?
-            if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
-//            if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
-              // report errors unless they are garbage at the end of the sim
-              // kind of hacky test for garbage right now
-              $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
-              errors = errors+1;
-              $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", 
-                    tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
-                    //   tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.RAM[testadr+i], signature[i]);
-              $stop;//***debug
-            end
-          end
-          i = i + 1;
-        end
-        /* verilator lint_on INFINITELOOP */
-        if (errors == 0) begin
-          $display("%s succeeded.  Brilliant!!!", tests[test]);
-        end
-        else begin
-          $display("%s failed with %d errors. :(", tests[test], errors);
-          totalerrors = totalerrors+1;
-        end
-        test = test + 2;
-        if (test == tests.size()) begin
-          if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
-          else $display("FAIL: %d test programs had errors", totalerrors);
-          $stop;
-        end
-        else begin
-            //pathname = tvpaths[tests[0]];
-            memfilename = {pathname, tests[test], ".elf.memfile"};
-            //$readmemh(memfilename, dut.uncore.ram.ram.RAM);
-            if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
-            else                   $readmemh(memfilename, dut.uncore.ram.RAM);
-            if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
-
-            ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
-            ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
-            $display("Read memfile %s", memfilename);
-            reset_ext = 1; # 47; //reset_ext = 0;
-        end
-      end
-    end // always @ (negedge clk)
-
-  // track the current function or global label
-  if (DEBUG == 1) begin : FunctionName
-    FunctionName FunctionName(.reset(reset),
-			      .clk(clk),
-			      .ProgramAddrMapFile(ProgramAddrMapFile),
-			      .ProgramLabelMapFile(ProgramLabelMapFile));
-  end
-
-  // Termination condition
-  // terminate on a specific ECALL after li x3,1 for old Imperas tests,  *** remove this when old imperas tests are removed
-  // or sw	gp,-56(t0) for new Imperas tests
-  // or sd gp, -56(t0) 
-  // or on a jump to self infinite loop (6f) for RISC-V Arch tests
-  logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
-  if (`ZICSR_SUPPORTED) assign ecf = dut.core.priv.priv.EcallFaultM;
-  else                  assign ecf = 0;
-  assign DCacheFlushStart = ecf & 
-			    (dut.core.ieu.dp.regf.rf[3] == 1 | 
-			     (dut.core.ieu.dp.regf.we3 & 
-			      dut.core.ieu.dp.regf.a3 == 3 & 
-			      dut.core.ieu.dp.regf.wd3 == 1)) |
-          (dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM;
-
-  DCacheFlushFSM DCacheFlushFSM(.clk(clk),
-    			.reset(reset),
-	    		.start(DCacheFlushStart),
-		    	.done(DCacheFlushDone));
-
-  // initialize the branch predictor
-  if (`BPRED_ENABLED == 1) 
-    initial begin
-      $readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
-      $readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);    
-    end 
-endmodule
-
-module riscvassertions;
-  initial begin
-    assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
-    assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
-    assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
-    assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
-    assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
-    assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
-    assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
-    assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
-    assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
-    assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
-    assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled");
-    assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size");
-    assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2");
-    assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
-    assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2");
-    assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
-    assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2");
-    assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2");
-    assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF");
-	  assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
-    assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
-    assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
-//    assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
-    assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
-    assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
-  end
-endmodule
-
-
-/* verilator lint_on STMTDLY */
-/* verilator lint_on WIDTH */
-
-module DCacheFlushFSM
-  (input logic clk,
-   input logic reset,
-   input logic start,
-   output logic done);
-
-  genvar adr;
-
-  logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)];
-  
-	if(`DMEM == `MEM_CACHE) begin
-	  localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
-	  localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
-	  localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
-	  localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN;  
-	  localparam integer lognumlines = $clog2(numlines);
-	  localparam integer loglinebytelen = $clog2(linebytelen);
-	  localparam integer lognumways = $clog2(numways);
-	  localparam integer tagstart = lognumlines + loglinebytelen;
-
-
-
-	  genvar 			 index, way, cacheWord;
-	  logic [`XLEN-1:0]  CacheData [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic [`XLEN-1:0]  CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic 			 CacheValid  [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic 			 CacheDirty  [numways-1:0] [numlines-1:0] [numwords-1:0];
-	  logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
-      for(index = 0; index < numlines; index++) begin
-		for(way = 0; way < numways; way++) begin
-		  for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
-			copyShadow #(.tagstart(tagstart),
-						 .loglinebytelen(loglinebytelen))
-			copyShadow(.clk,
-					   .start,
-					   .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
-					   .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
-					   .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
-					   .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
-					   .index(index),
-					   .cacheWord(cacheWord),
-					   .CacheData(CacheData[way][index][cacheWord]),
-					   .CacheAdr(CacheAdr[way][index][cacheWord]),
-					   .CacheTag(CacheTag[way][index][cacheWord]),
-					   .CacheValid(CacheValid[way][index][cacheWord]),
-					   .CacheDirty(CacheDirty[way][index][cacheWord]));
-		  end
-		end
-      end
-
-	  integer i, j, k;
-
-	  always @(posedge clk) begin
-		if (start) begin #1
-		  #1
-			for(i = 0; i < numlines; i++) begin
-			  for(j = 0; j < numways; j++) begin
-				for(k = 0; k < numwords; k++) begin
-				  if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
-					ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
-				  end
-				end	
-			  end
-			end
-		end
-	  end
-
-	  
-	end
-  flop #(1) doneReg(.clk, .d(start), .q(done));
-endmodule
-
-module copyShadow
-  #(parameter tagstart, loglinebytelen)
-  (input logic clk,
-   input logic 			     start,
-   input logic [`PA_BITS-1:tagstart] tag,
-   input logic 			     valid, dirty,
-   input logic [`XLEN-1:0] 	     data,
-   input logic [32-1:0] 	     index,
-   input logic [32-1:0] 	     cacheWord,
-   output logic [`XLEN-1:0] 	     CacheData,
-   output logic [`PA_BITS-1:0] 	     CacheAdr,
-   output logic [`XLEN-1:0] 	     CacheTag,
-   output logic 		     CacheValid,
-   output logic 		     CacheDirty);
-  
-
-  always_ff @(posedge clk) begin
-    if(start) begin
-      CacheTag = tag;
-      CacheValid = valid;
-      CacheDirty = dirty;
-      CacheData = data;
-      CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8));
-    end
-  end
-  
-endmodule		      
-
diff --git a/synthDC/Makefile b/synthDC/Makefile
index 193153cac..3de666659 100755
--- a/synthDC/Makefile
+++ b/synthDC/Makefile
@@ -15,6 +15,7 @@ export MAXCORES ?= 4
 # MAXOPT turns on flattening, boundary optimization, and retiming
 # The output netlist is hard to interpret, but significantly better PPA
 export MAXOPT ?= 0
+export DRIVE ?= FLOP
 
 time := $(shell date +%F-%H-%M)
 hash := $(shell git rev-parse --short HEAD)
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output
index 278e0aa70..3cbf56ae5 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output
@@ -1,7 +1,23 @@
+00000000 # test reset to zero
 00000000
-00000000
-A5A5A5A5
+A5A5A5A5 # test output pins
 5A5AFFFF
-00000000
+00000000 # test input enables
 5A5A0000
-A55A0000
+A55A0000 # test XOR
+A55A0000 # Test interrupt pending bits: high_ip
+5AA5FFFF #   low_ip
+00000000 #   rise_ip
+00000000 #   fall_ip
+A4AA0000 #   input_val
+A5FA0000 #   high_ip
+5BF5FFFF #   low_ip
+00A00000 #   rise_ip
+01500000 #   fall_ip
+00000000 #   MEIP
+00000000 # Test interrupts can be enabled without being triggered: MIP = 0
+00000000 #   MIP = 0
+00000000 #   MIP = 0
+00000000 #   MIP = 0
+00000800 #  Test interrupts can be enabled and triggered: MEIP set
+00000000 #   MEIP = 0
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
index a72ae385a..0caad5d0b 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
@@ -827,6 +827,28 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
     addi a6, a6, 4 
 .endm
 
+// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts
+.macro SETUP_PLIC  
+    # Setup PLIC with a series of register writes
+
+    .equ PLIC_INTPRI_GPIO, 0x0C00000C       # GPIO is interrupt 3
+    .equ PLIC_INTPRI_UART, 0x0C000028       # UART is interrupt 10
+    .equ PLIC_INTPENDING0, 0x0C001000       # intPending0 register
+    .equ PLIC_INTEN00,     0x0C002000       # interrupt enables for context 0 (machine mode) sources 31:1
+    .equ PLIC_INTEN10,     0x0C002080       # interrupt enables for context 1 (supervisor mode) sources 31:1
+    .equ PLIC_THRESH0,     0x0C200000       # Priority threshold for context 0 (machine mode)
+    .equ PLIC_CLAIM0,      0x0C200004       # Claim/Complete register for context 0
+    .equ PLIC_THRESH1,     0x0C201000       # Priority threshold for context 1 (supervisor mode)
+    .equ PLIC_CLAIM1,      0x0C201004       # Claim/Complete register for context 1
+
+    .4byte PLIC_THRESH0, 0, write32_test    # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
+    .4byte PLIC_THRESH1, 7, write32_test    # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
+    .4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
+    .4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
+    .4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
+    .4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
+.endm
+
 .macro END_TESTS
     // invokes one final ecall to return to machine mode then terminates this program, so the output is
     //      0x8: termination called from U mode
@@ -937,6 +959,20 @@ read08_test:
     addi a6, a6, 4
     j test_loop // go to next test case
 
+readmip_test:  // read the MIP into the signature
+    csrr t2, mip
+    sw t2, 0(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
+    j test_loop // go to next test case
+
+readsip_test:  // read the MIP into the signature
+    csrr t2, sip
+    sw t2, 0(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
+    j test_loop // go to next test case
+
 goto_s_mode:
     // return to address in t3, 
     li a0, 3 // Trap handler behavior (go to supervisor mode)
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
index e4792a78c..be40c0e26 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
@@ -72,6 +72,7 @@ test_cases:
 
 .4byte input_val, 0x00000000, read32_test  # input_val reset to zero
 .4byte input_en, 0x00000000, read32_test  # input_en reset to zero
+# *** add more
 
 # =========== Test output and input pins ===========
 
@@ -86,14 +87,49 @@ test_cases:
 .4byte input_en, 0x00000000, write32_test       # disable all input pins
 .4byte input_val, 0x00000000, read32_test       # read 0 since input pins are disabled
 .4byte input_en, 0xFFFF0000, write32_test       # enable a few input pins
-.4byte input_val, 0x5A5A0000, read32_test      # read part of pattern set above.
+.4byte input_val, 0x5A5A0000, read32_test       # read part of pattern set above.
 
-# =========== Test output enables(?) ===========
-
-.4byte output_en, 0xFFFFFFFF, write32_test      # undo changes made to output enable
 
 # =========== Test XOR functionality ===========
 .4byte out_xor, 0xFF00FF00, write32_test        # invert certain pin values
-.4byte input_val, 0xA55A0000, read32_test           # read inverted pins and verify input enable is working
+.4byte input_val, 0xA55A0000, read32_test       # read inverted pins and verify input enable is working
+
+# =========== Test Interrupt Pending bits ===========
+
+SETUP_PLIC
+
+.4byte low_ip, 0xFFFFFFFF, write32_test             # clear pending low interrupts
+.4byte high_ip, 0xFFFFFFFF, write32_test            # clear pending high interrupts
+.4byte rise_ip, 0xFFFFFFFF, write32_test            # clear pending rise interrupts
+.4byte fall_ip, 0xFFFFFFFF, write32_test            # clear pending fall interrupts
+.4byte high_ip, 0xA55A0000, read32_test             # check pending high interrupts
+.4byte low_ip, 0x5AA5FFFF, read32_test              # check pending low interrupts
+.4byte rise_ip, 0x00000000, read32_test             # check pending rise interrupts
+.4byte fall_ip, 0x00000000, read32_test             # check pending fall interrupts
+.4byte output_val, 0x5BAA000F, write32_test         # change output pattern to check rise/fall interrupts
+.4byte input_val, 0xA4AA0000, read32_test           # check new output matches expected output
+.4byte high_ip, 0xA5FA00000, read32_test            # high interrupt pending *** (is this correct?)
+.4byte low_ip, 0x5BF5FFFF, read32_test              # low interrupt pending should be opposite high for enabled pins
+.4byte rise_ip, 0x00A00000, read32_test             # check for changed bits (rising)
+.4byte fall_ip, 0x01500000, read32_test             # check for changed bits (falling)
+.4byte 0x0, 0x00000000, readmip_test                # Check no external interrupt has been generated
+
+# =========== Test interrupts can be enabled without being triggered ===========
+
+.4byte high_ie, 0x00010000, write32_test            # enable high interrupt on bit 16, no pending interrupt
+.4byte 0x0, 0x00000000, readmip_test                # No external interrupt should be pending
+.4byte low_ie, 0x00020000, write32_test             # enable low interrupt on bit 17, no pending interrupt
+.4byte 0x0, 0x00000000, readmip_test                # No external interrupt should be pending
+.4byte rise_ie, 0x00010000, write32_test            # enable rise interrupt on bit 16, no pending interrupt
+.4byte 0x0, 0x00000000, readmip_test                # No external interrupt should be pending
+.4byte fall_ie, 0x00010000, write32_test            # enable fall interrupt on bit 16, no pending interrupt
+.4byte 0x0, 0x00000000, readmip_test                # No external interrupt should be pending
+
+# =========== Test interrupts can be enabled and triggered
+
+.4byte high_ie, 0x00020000, write32_test            # enable high interrupt on bit 17, which is pending
+.4byte 0x0, 0x00000800, readmip_test                # MEIP should be raised
+.4byte high_ie, 0x00000000, write32_test             # disable high interrupt on bit 17, which is pending
+.4byte 0x0, 0x00000000, readmip_test                # MEIP should be released
 
 .4byte 0x0, 0x0, terminate_test # terminate tests
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output
index 7b23883c6..fd88590e3 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output
@@ -254,12 +254,12 @@ FFFFEE00
 FFFFEE00
 00000000
 00000000
-02BEEF10
+02BEEF10 # Something here is failing
 0000000B
 80000000
 00000003
 000000FF
-FFFFFFFF
+00000000
 000000FF
 00000000
 00000000
@@ -270,20 +270,20 @@ FFFFFFFF
 FFFFFF00
 00000000
 00000000
-02BEEF11
+02BEEF11 # this might be wrong
 0000000B
 80000000
-00000003
-000000CC
-CCCCCCCC
-00000000
-00000000
-00000033
-00000000
-000000FF
-000000CC
-FFFFFF33
-FFFFFF33
+00000003 
+00000033 # input
+00000000 # output
+00000000 # rise ip
+00000000 # serviced rise ip
+000000CC # fall ip
+00000000 
+000000FF # high ip
+00000033 # why is this 0x33?
+FFFFFFCC # low ip
+FFFFFFCC # serviced low ip
 00000000
 00000000
 03BEEF12
@@ -454,9 +454,9 @@ FFFFFF33
 00080000
 00080000
 00000000
+00000000 # is it this one that's failing?
 00000000
-00000000
-00080000
+00080000 # failing
 00080000
 FFFFFFFF
 FFF7FFFF
@@ -478,7 +478,7 @@ FFFFFFFF
 FFFFFFFE
 00000000
 00000000
-04BEEF1E
+04BEEF1E # this might also be wrong
 00000009
 80000000
 0000000A
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S
index c44d7a681..705875146 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S
@@ -271,7 +271,7 @@ main_code: #####
     sw t1, 0x04(t0)
     # raise all output_en
     sw t1, 0x08(t0)
-    # raise all input_en
+    # raise all rise_en
     sw t1, 0x18(t0)
     # ========== Execute Test ==========
     # set MEIE
@@ -616,6 +616,9 @@ Intr02BEEF11:
     sw t1, 0x08(t0)
     # set initial output state
     sw x0, 0x0C(t0)
+    # clear XOR
+    li t1, 0x00000000
+    sw t1, 0x40(t0)
     # clear all pending interrupts
     li t1, 0xFFFFFFFF
     sw t1, 0x1C(t0)
@@ -843,7 +846,7 @@ Intr03BEEF1A:
     sw t1, 0x04(t0)
     # raise all output_en
     sw t1, 0x08(t0)
-    # raise all input_en
+    # raise all rise_en
     sw t1, 0x18(t0)
     # ========== Execute Test ==========
     # set MEIE and SEIE