From b728e5054d2fb581321985668efabfca439b61b6 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 11 Jul 2022 18:30:21 -0700
Subject: [PATCH] variable interations implemented in radix-4 divider

---
 pipelined/config/rv64fp/wally-config.vh |   4 +-
 pipelined/config/shared/wally-shared.vh |  12 +-
 pipelined/regression/wave-fpu.do        |  10 +-
 pipelined/src/fpu/divshiftcalc.sv       |   9 +-
 pipelined/src/fpu/divsqrt.sv            |  10 +-
 pipelined/src/fpu/flags.sv              |  38 ++--
 pipelined/src/fpu/fmashiftcalc.sv       |  24 +-
 pipelined/src/fpu/fpu.sv                |  10 +-
 pipelined/src/fpu/lzacorrection.sv      |   4 +-
 pipelined/src/fpu/postprocess.sv        |  26 +--
 pipelined/src/fpu/resultselect.sv       | 290 ------------------------
 pipelined/src/fpu/resultsign.sv         |   4 +-
 pipelined/src/fpu/round.sv              |   6 +-
 pipelined/src/fpu/srt-radix4.sv         | 140 ++++++++----
 pipelined/src/fpu/srtfsm.sv             |  10 +-
 pipelined/src/fpu/srtpreproc.sv         |  16 +-
 pipelined/testbench/testbench-fp.sv     |  14 +-
 17 files changed, 203 insertions(+), 424 deletions(-)
 delete mode 100644 pipelined/src/fpu/resultselect.sv

diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index b92bc07a..cc8d1b2b 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -32,14 +32,14 @@
 `define DESIGN_COMPILER 0
 
 // RV32 or RV64: XLEN = 32 or 64
-`define XLEN 64
+`define XLEN 32
 
 // IEEE 754 compliance
 `define IEEE754 0
 
 // MISA RISC-V configuration per specification
 //                    ZYXWVUTSRQPONMLKJIHGFEDCBA
-`define MISA 32'b0000000000101000001000100100101
+`define MISA 32'b0000000000101000001000100101101
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 671f7343..c064783c 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -95,12 +95,22 @@
 
 // largest length in IEU/FPU
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
-`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
 `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
 `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
 
+// division constants
+`define RADIX 4
+`define DIVCOPIES 4
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
+`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
+`define LOGR ((`RADIX==2) ? 1 : 2)
+`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES))
+`define DURLEN ($clog2($rtoi(`FPDUR)+1))
+`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES)
+
+
 `define USE_SRAM 0
 
 // Disable spurious Verilator warnings
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 9e7ba49b..58f782bd 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -11,7 +11,7 @@ add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/DivBusy
 add wave -noupdate /testbenchfp/srtfsm/state
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
@@ -21,8 +21,12 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
+add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/*
+add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/*
+add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/*
 add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 935ed3c1..a4f3feff 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -1,9 +1,9 @@
 `include "wally-config.vh"
 
 module divshiftcalc(
-    input logic  [`DIVLEN+2:0] Quot,
+    input logic  [`QLEN-1:0] Quot,
     input logic  [`FMTBITS-1:0] Fmt,
-    input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
+    input logic [`DURLEN-1:0] DivEarlyTermShift,
     input logic [`NE+1:0] DivCalcExp,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
@@ -32,9 +32,10 @@ module divshiftcalc(
     // inital Left shift amount  = NF
     assign NormShift = (`NE+2)'(`NF);
     // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
+    // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
+    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)};
 
     // *** may be able to reduce shifter size
-    assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
+    assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}};
 
 endmodule
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index 086b97d8..c4f09aea 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -47,8 +47,8 @@ module divsqrt(
   output logic DivBusy,
   output logic DivDone,
   output logic [`NE+1:0] DivCalcExpM,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
-  output logic [`DIVLEN+2:0] QuotM
+  output logic [`DURLEN-1:0] EarlyTermShiftM,
+  output logic [`QLEN-1:0] QuotM
 //   output logic [`XLEN-1:0] RemM,
 );
 
@@ -57,12 +57,12 @@ module divsqrt(
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DIVLEN-1:0] X;
   logic [`DIVLEN-1:0] Dpreproc;
-  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DURLEN-1:0] Dur;
 
   srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
   srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
-                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
+                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
+  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
                 .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 98250a45..4e16bc96 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -34,20 +34,20 @@ module flags(
     input logic                 XInf, YInf, ZInf,    // inputs are infinity
     input logic                 Plus1,
     input logic                 InfIn,                  // is a Inf input being used
+    input logic                 NaNIn,                  // is a NaN input being used
+    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic                 XZero, YZero,         // inputs are zero
     input logic                 XNaN, YNaN,           // inputs are NaN
-    input logic                 NaNIn,                  // is a NaN input being used
     input logic                 Sqrt,                   // Sqrt?
     input logic                 ToInt,                  // convert to integer
     input logic                 IntToFp,                // convert integer to floating point
     input logic                 Int64,                  // convert to 64 bit integer
     input logic                 Signed,                 // convert to a signed integer
-    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic [`NE:0]         CvtCe,            // the calculated expoent - Cvt
     input logic                 CvtOp,                  // conversion opperation?
     input logic                 DivOp,                  // conversion opperation?
     input logic                 FmaOp,                  // Fma opperation?
-    input logic  [`NE+1:0]      FullResExp,             // Re with bits to determine sign and overflow
+    input logic  [`NE+1:0]      FullRe,             // Re with bits to determine sign and overflow
     input logic  [`NE+1:0]      Nexp,               // exponent of the normalized sum
     input logic  [1:0]          CvtNegResMsbs,             // the negitive integer result's most significant bits
     input logic                 FmaAs, FmaPs,        // the product and modified Z signs
@@ -73,30 +73,30 @@ module flags(
 
 
    if (`FPSIZES == 1) begin
-        assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
-        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+        assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+        assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
 
     end else if (`FPSIZES == 2) begin    
-        assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
+        assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
 
-        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+        assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
     end else if (`FPSIZES == 3) begin
         always_comb
             case (OutFmt)
-                `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
-                `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
-                `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
+                `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+                `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+                `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
                 default: ResExpGteMax = 1'bx;
             endcase
-            assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+            assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
 
     end else if (`FPSIZES == 4) begin        
         always_comb
             case (OutFmt)
-                `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
-                `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
-                `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
-                `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
+                `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
+                `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
+                `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
+                `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
             endcase
             // a left shift of intlen+1 is still in range but any more than that is an overflow
             //           inital: |      64 0's         |    XLEN     |
@@ -110,14 +110,14 @@ module flags(
             //      - any of the bits after the most significan 1 is one
             //      - the most signifcant in 65 or 33 is still a one in the number and
             //        one of the later bits is one
-            assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+            assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
     end
 
     //                 if the result is greater than or equal to the max exponent(not taking into account sign)
     //                 |           and the exponent isn't negitive
     //                 |           |                   if the input isnt infinity or NaN
     //                 |           |                   |            
-    assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
+    assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
 
     // detecting tininess after rounding
     //                  the exponent is negitive
@@ -127,7 +127,7 @@ module flags(
     //                  |                    |                    |                                      |                     and if the result is not exact
     //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
     //                  |                    |                    |                                      |                     |               |
-    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
+    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
 
     // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
     //      - Don't set the underflow flag if an underflowed res isn't outputed
@@ -153,7 +153,7 @@ module flags(
     //                  |           |                                  |                    |               or the res rounds up out of bounds
     //                  |           |                                  |                    |                       and the res didn't underflow
     //                  |           |                                  |                    |                       |
-    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
+    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
     //                                                                                                     |
     //                                                                                                     or when the positive res rounds up out of range
     assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 5f55e17b..d4898e80 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -37,7 +37,7 @@ module fmashiftcalc(
     input logic                         FmaKillProd,  // is the product set to zero
     input logic 			            ZDenorm,
     output logic [`NE+1:0]              FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
-    output logic                        FmaSmZero,    // is the result denormalized - calculated before LZA corection
+    output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
     output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
@@ -50,7 +50,7 @@ module fmashiftcalc(
     ///////////////////////////////////////////////////////////////////////////////
     //*** insert bias-bias simplification in fcvt.sv/phone pictures
     // Determine if the sum is zero
-    assign FmaSmZero = ~(|FmaSm);
+    assign FmaSZero = ~(|FmaSm);
 
     // calculate the sum's exponent
     assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
@@ -90,7 +90,7 @@ module fmashiftcalc(
         logic Sum0LEZ, Sum0GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
         assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
+        assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
 
     end else if (`FPSIZES == 2) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
@@ -98,7 +98,7 @@ module fmashiftcalc(
         assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
         assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
         assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
-        assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero;
+        assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
 
     end else if (`FPSIZES == 3) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
@@ -110,9 +110,9 @@ module fmashiftcalc(
         assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
-                `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
-                `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
-                `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
+                `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+                `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+                `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
                 default: FmaPreResultDenorm = 1'bx;
             endcase
         end
@@ -129,10 +129,10 @@ module fmashiftcalc(
         assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
-                2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
-                2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
-                2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
-                2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero;
+                2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+                2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+                2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
+                2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
             endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
         end
 
@@ -144,7 +144,7 @@ module fmashiftcalc(
     //      - if kill prod dont add to exp
 
     // Determine if the result is denormal
-    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero;
+    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 8336c39c..e1c9e5fa 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -125,12 +125,12 @@ module fpu (
    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`DIVLEN+2:0] QuotE, QuotM;
+   logic [`QLEN-1:0] QuotM;
    logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
    logic DivNegStickyE, DivNegStickyM;
    logic DivStickyE, DivStickyM;
    logic DivDoneM;
-   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
+   logic [`DURLEN-1:0] EarlyTermShiftM;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -289,7 +289,7 @@ module fpu (
    divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
                   .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
-                  .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
+                  .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
    // other FP execution units
    fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
             .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
@@ -381,12 +381,12 @@ module fpu (
 
    assign FpLoadStoreM = FResSelM[1];
 
-   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M),
+   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
                            .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
                            .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
                            .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
                            .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
-                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
+                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
    // FPU flag selection - to privileged
    mux2  #(5)  FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv
index 03b36f4f..17db0c0b 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@@ -38,7 +38,7 @@ module lzacorrection(
     input logic  [`NE+1:0]          FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     input logic                     FmaKillProd,  // is the product set to zero
-    input logic                     FmaSmZero,
+    input logic                     FmaSZero,
     output logic [`CORRSHIFTSZ-1:0] Nfrac,         // the shifted sum before LZA correction
     output logic [`NE+1:0]          DivCorrExp,
     output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
@@ -59,7 +59,7 @@ module lzacorrection(
     assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}};
+    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index e165e7e1..18452abd 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -54,12 +54,12 @@ module postprocess(
     input logic                             FmaInvA,      // do you invert Z
     input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
     //divide signals
-    input logic  [$clog2(`DIVLEN/2+3)-1:0]  DivEarlyTermShiftDiv2,
+    input logic  [`DURLEN-1:0]              DivEarlyTermShift,
     input logic                             DivSticky,
     input logic                             DivNegSticky,
     input logic                             DivDone,
     input logic  [`NE+1:0]                  DivCalcExp,
-    input logic  [`DIVLEN+2:0]              Quot,
+    input logic  [`QLEN-1:0]                Quot,
     // conversion signals
     input logic                             CvtCs,     // the result's sign
     input logic  [`NE:0]                    CvtCe,    // the calculated expoent
@@ -69,7 +69,7 @@ module postprocess(
     input logic  [`CVTLEN-1:0]              CvtLzcIn,      // input to the Leading Zero Counter (priority encoder)
     input logic                             IntZero,         // is the input zero
     // final results
-    output logic [`FLEN-1:0]                W,    // FMA final result
+    output logic [`FLEN-1:0]                PostProcRes,    // FMA final result
     output logic [4:0]                      PostProcFlg,
     output logic [`XLEN-1:0]                FCvtIntRes    // the int conversion result
     );
@@ -81,7 +81,7 @@ module postprocess(
     logic Nsgn;
     logic [`NE+1:0] Nexp;
     logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
-    logic [`NE+1:0] FullResExp;  // Re with bits to determine sign and overflow
+    logic [`NE+1:0] FullRe;  // Re with bits to determine sign and overflow
     logic S;           // S bit
     logic UfPlus1;                    // do you add one (for determining underflow flag)
     logic R;   // bits needed to determine rounding
@@ -95,7 +95,7 @@ module postprocess(
     logic [`FMTBITS-1:0] OutFmt;
     // fma signals
     logic [`NE+1:0] FmaSe;     // exponent of the normalized sum
-    logic FmaSmZero;        // is the sum zero
+    logic FmaSZero;        // is the sum zero
     logic [3*`NF+8:0] FmaShiftIn;        // shift input
     logic [`NE+1:0] FmaConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
     logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
@@ -153,8 +153,8 @@ module postprocess(
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
-                          .ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
+                          .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSel)
@@ -185,7 +185,7 @@ module postprocess(
 
     lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
                                 .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
-                                .DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac);
+                                .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -204,14 +204,14 @@ module postprocess(
     round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
                 .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
                 .DivSticky, .DivNegSticky, .DivDone,
-                .DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
+                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
     ///////////////////////////////////////////////////////////////////////////////
 
     resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
-                          .FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws);
+                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Flags
@@ -220,7 +220,7 @@ module postprocess(
     flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
                 .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
                 .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
-                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
+                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
                 .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
 
     ///////////////////////////////////////////////////////////////////////////////
@@ -228,10 +228,10 @@ module postprocess(
     ///////////////////////////////////////////////////////////////////////////////
 
     negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
-    resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
+    specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
         .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
         .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
         .XInf, .YInf, .DivOp,
-        .DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes);
+        .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
 
 endmodule
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
deleted file mode 100644
index 4389056f..00000000
--- a/pipelined/src/fpu/resultselect.sv
+++ /dev/null
@@ -1,290 +0,0 @@
-///////////////////////////////////////////
-//
-// Written: me@KatherineParry.com
-// Modified: 7/5/2022
-//
-// Purpose: special case selection
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// MIT LICENSE
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
-// software and associated documentation files (the "Software"), to deal in the Software 
-// without restriction, including without limitation the rights to use, copy, modify, merge, 
-// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
-// to whom the Software is furnished to do so, subject to the following conditions:
-//
-//   The above copyright notice and this permission notice shall be included in all copies or 
-//   substantial portions of the Software.
-//
-//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
-//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
-//   OR OTHER DEALINGS IN THE SOFTWARE.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module resultselect(
-    input logic                 Xs,        // input signs
-    input logic  [`NF:0]        Xm, Ym, Zm, // input mantissas
-    input logic                 XNaN, YNaN, ZNaN,    // inputs are NaN
-    input logic  [2:0]          Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic  [`FMTBITS-1:0] OutFmt,       // output format
-    input logic                 InfIn,
-    input logic                 XInf, YInf,
-    input logic                 XZero,
-    input logic                 IntZero,
-    input logic                 NaNIn,
-    input logic                 IntToFp,
-    input logic                 Int64,
-    input logic                 Signed,
-    input logic                 CvtOp,
-    input logic                 DivOp,
-    input logic                 FmaOp,
-    input logic                 Plus1,
-    input logic                 DivByZero,
-    input logic  [`NE:0]        CvtCe,    // the calculated expoent
-    input logic                 Ws,  // the res's sign
-    input logic                 IntInvalid, Invalid, Overflow,  // flags
-    input logic                 CvtResUf,
-    input logic  [`NE-1:0]      Re,          // Res exponent
-    input logic  [`NE+1:0]      FullResExp,          // Res exponent
-    input logic  [`NF-1:0]      Rf,         // Res fraction
-    input logic  [`XLEN+1:0]    CvtNegRes,     // the negation of the result
-    output logic [`FLEN-1:0]    W,     // final res
-    output logic [`XLEN-1:0]    FCvtIntRes     // final res
-);
-    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
-    logic OfResMax;
-    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
-    logic KillRes;
-    logic SelOfRes;
-
-
-    // does the overflow result output the maximum normalized floating point number
-    //                output infinity if the input is infinity
-    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws));
-
-    if (`FPSIZES == 1) begin
-
-        //NaN res selection depending on standard
-        if(`IEEE754) begin
-            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end else begin
-            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end
-
-        assign OfRes =  OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
-        assign NormRes = {Ws, Re, Rf};
-
-    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
-        if(`IEEE754) begin
-            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
-            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end else begin 
-            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end
-        
-        assign OfRes =  OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} :
-                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-        assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {Ws, Re, Rf};
-                end
-                `FMT1: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
-                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end
-                    OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
-                    UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
-                end
-                `FMT2: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
-                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
-                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
-                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end
-                    
-                    OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)};
-                    UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
-                end
-                default: begin
-                    if(`IEEE754) begin
-                        XNaNRes = (`FLEN)'(0);
-                        YNaNRes = (`FLEN)'(0);
-                        ZNaNRes = (`FLEN)'(0);
-                        InvalidRes = (`FLEN)'(0);
-                    end else begin 
-                        InvalidRes = (`FLEN)'(0);
-                    end
-                    OfRes = (`FLEN)'(0);
-                    UfRes = (`FLEN)'(0);
-                    NormRes = (`FLEN)'(0);
-                end
-            endcase
-
-    end else if (`FPSIZES == 4) begin 
-        always_comb
-            case (OutFmt)
-                2'h3: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {Ws, Re, Rf};
-                end
-                2'h1: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
-                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
-                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
-                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end
-                    OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
-                end
-                2'h0: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
-                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
-                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
-                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end
-                    
-                    OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
-                end
-                2'h2: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
-                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
-                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
-                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end
-                    
-                    OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)};      
-	            // zero is exact fi dividing by infinity so don't add 1
-                    UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
-                end
-            endcase
-
-    end
-
-    
-
-
-
-    // determine if you shoould kill the res - Cvt
-    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
-    //      - dont set to zero if fp input is zero but not using the fp input
-    //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
-    assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
-    // output infinity with result sign if divide by zero
-    if(`IEEE754) begin
-        assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
-                         YNaN&~CvtOp ? YNaNRes :
-                         ZNaN&FmaOp ? ZNaNRes :
-                         Invalid ? InvalidRes : 
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end else begin
-        assign W = NaNIn|Invalid ? InvalidRes :
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end
-
-    ///////////////////////////////////////////////////////////////////////////////////////
-    //
-    //      |||||||||||   |||     |||   |||||||||||||
-    //          |||       ||||||  |||        |||
-    //          |||       ||| ||| |||        |||
-    //          |||       |||  ||||||        |||
-    //      |||||||||||   |||     |||        |||
-    //
-    ///////////////////////////////////////////////////////////////////////////////////////        
-
-    // *** probably can optimize the negation
-    // select the overflow integer res
-    //      - negitive infinity and out of range negitive input
-    //                 |  int  |  long  |
-    //          signed | -2^31 | -2^63  |
-    //        unsigned |   0   |    0   |
-    //
-    //      - positive infinity and out of range positive input and NaNs
-    //                 |   int  |  long  |
-    //          signed | 2^31-1 | 2^63-1 |
-    //        unsigned | 2^32-1 | 2^64-1 |
-    //
-    //      other: 32 bit unsinged res should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
-                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive
-                                              {`XLEN{1'b1}};// unsigned positive
-
-
-    // select the integer output
-    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
-    //      - if the input underflows
-    //          - if rounding and signed opperation and negitive input, output -1
-    //          - otherwise output a rounded 0
-    //      - otherwise output the normal res (trmined and sign extended if nessisary)
-    assign FCvtIntRes = IntInvalid ?  OfIntRes :
-			            CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
-                        Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
-endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index 05c3b461..e6de0c18 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -35,7 +35,7 @@ module resultsign(
     input logic         InfIn,
     input logic         FmaOp,
     input logic [`NE+1:0] FmaSe,
-    input logic         FmaSmZero,
+    input logic         FmaSZero,
     input logic         Mult,
     input logic         R,
     input logic         S,
@@ -61,6 +61,6 @@ module resultsign(
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
     assign InfSgn = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn;
+    assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn;
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 4c185ff3..c73edc08 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -57,7 +57,7 @@ module round(
     input logic                     DivSticky,             // sticky bit
     input logic                     DivNegSticky,
     output logic                    UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]          FullResExp,      // Re with bits to determine sign and overflow
+    output logic [`NE+1:0]          FullRe,      // Re with bits to determine sign and overflow
     output logic [`NF-1:0]          Rf,         // Result fraction
     output logic [`NE-1:0]          Re,          // Result exponent
     output logic                    S,             // sticky bit
@@ -344,8 +344,8 @@ module round(
 
     // round the result
     //      - if the fraction overflows one should be added to the exponent
-    assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd;
-    assign Re = FullResExp[`NE-1:0];
+    assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd;
+    assign Re = FullRe[`NE-1:0];
 
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
index 741d4e83..1c7b9648 100644
--- a/pipelined/src/fpu/srt-radix4.sv
+++ b/pipelined/src/fpu/srt-radix4.sv
@@ -30,7 +30,7 @@
 
 `include "wally-config.vh"
 
-module srtradix4 (
+module srtradix4(
   input  logic clk,
   input  logic DivStart, 
   input  logic DivBusy, 
@@ -40,20 +40,29 @@ module srtradix4 (
   input logic [`DIVLEN-1:0] X,
   input logic [`DIVLEN-1:0] Dpreproc,
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [`DIVLEN+2:0] Quot,
+  output logic [`QLEN-1:0] Quot,
   output logic [`DIVLEN+3:0]  WSN, WCN,
-  output logic [`DIVLEN+3:0]  WS, WC,
+  output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
   output logic  [`NE+1:0] DivCalcExpM,
   output logic [`XLEN-1:0] Rem
 );
 
-  logic [3:0]     q;
-  logic [`DIVLEN+3:0]  WSA;
-  logic [`DIVLEN+3:0]  WCA;
-  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
+
+ /* verilator lint_off UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WCA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WS[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WC[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
+ /* verilator lint_on UNOPTFLAT */
+  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
   logic [`NE+1:0] DivCalcExp;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
+  logic [`QLEN-1:0] QMux, QMMux;
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -63,47 +72,43 @@ module srtradix4 (
   //  - otherwise load WSA into the flipflop
   //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
   //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
-  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
-  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
-  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
+  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
+  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
   flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
   flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
 
-  // Quotient Selection logic
-  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  // *** change this for radix 4 - generate w/ stine code
-  // q encoding:
-	// 1000 = +2
-	// 0100 = +1
-	// 0000 =  0
-	// 0010 = -1
-	// 0001 = -2
-  qsel4 qsel4(.D, .WS, .WC, .q);
 
-  // Divisor Selection logic
-  // *** radix 4 change to choose -2 to 2
+  // Divisor Selections
   // - choose the negitive version of what's being selected
   assign DBar = ~D;
   assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
   assign D2 = {D[`DIVLEN+2:0], 1'b0};
 
-  always_comb
-    case (q)
-      4'b1000: Dsel = DBar2;
-      4'b0100: Dsel = DBar;
-      4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
-      4'b0010: Dsel = D;
-      4'b0001: Dsel = D2;
-      default: Dsel = {`DIVLEN+4{1'bx}};
-    endcase
+  genvar i;
+  generate
+    for(i=0; i<`DIVCOPIES; i++) begin
+      divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, 
+      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
+      if(i<3) begin 
+        assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
+        assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
+        assign Q[i+1] = QNext[i];
+        assign QM[i+1] = QMNext[i];
+      end
+    end
+  endgenerate
 
-  // Partial Product Generation
-  //  WSA, WCA = WS + WC - qD
-  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
-  
-  //*** change for radix 4
-  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
+  // if starting a new divison set Q to 0 and QM to -1
+  mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux);
+  mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
+  flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage
+  flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
+
+  assign Quot = Q[0];
+  assign FirstWS = WS[0];
+  assign FirstWC = WC[0];
 
   expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
 
@@ -113,7 +118,50 @@ endmodule
 // Submodules //
 ////////////////
 
+ /* verilator lint_off UNOPTFLAT */
+module divinteration (
+  input logic clk,
+  input logic DivStart,
+  input logic DivBusy,
+  input logic [`DIVLEN+3:0] D,
+  input logic [`DIVLEN+3:0]  DBar, D2, DBar2,
+  input logic [`QLEN-1:0] Q, QM,
+  input logic [`DIVLEN+3:0]  WS, WC,
+  output logic [`QLEN-1:0] QNext, QMNext, 
+  output logic [`DIVLEN+3:0]  WSA, WCA
+);
+ /* verilator lint_on UNOPTFLAT */
 
+  logic [`DIVLEN+3:0]  Dsel;
+  logic [3:0]     q;
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  // q encoding:
+	// 1000 = +2
+	// 0100 = +1
+	// 0000 =  0
+	// 0010 = -1
+	// 0001 = -2
+  qsel4 qsel4(.D, .WS, .WC, .q);
+
+  always_comb
+    case (q)
+      4'b1000: Dsel = DBar2;
+      4'b0100: Dsel = DBar;
+      4'b0000: Dsel = {`DIVLEN+4{1'b0}};
+      4'b0010: Dsel = D;
+      4'b0001: Dsel = D2;
+      default: Dsel = {`DIVLEN+4{1'bx}};
+    endcase
+
+  // Partial Product Generation
+  //  WSA, WCA = WS + WC - qD
+  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
+
+  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext);
+
+endmodule
 
 module qsel4 (
 	input logic [`DIVLEN+3:0] D,
@@ -195,7 +243,8 @@ module otfc4 (
   input  logic         DivStart,
   input  logic         DivBusy,
   input  logic [3:0]   q,
-  output logic [`DIVLEN+2:0] Quot
+  input logic [`QLEN-1:0] Q, QM,
+  output logic [`QLEN-1:0] QNext, QMNext
 );
 
   //  The on-the-fly converter transfers the quotient 
@@ -207,16 +256,11 @@ module otfc4 (
   //
   //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
-  logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
+
   //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
-  logic [`DIVLEN:0] QR, QMR;
-  // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
-  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
-  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
+  logic [`QLEN-3:0] QR, QMR;
 
   // shift Q (quotent) and QM (quotent-1)
 		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
@@ -227,8 +271,8 @@ module otfc4 (
     // *** how does the 0 concatination numbers work?
 
   always_comb begin
-    QR  = Quot[`DIVLEN:0];
-    QMR = QM[`DIVLEN:0];     // Shift Q and QM
+    QR  = Q[`QLEN-3:0];
+    QMR = QM[`QLEN-3:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
       QMNext = {QR,  2'b01};
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 008b234d..fc73cf71 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -40,8 +40,8 @@ module srtfsm(
   input  logic DivStart, 
   input logic StallE,
   input logic StallM,
-  input  logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
+  input  logic [`DURLEN-1:0] Dur,
+  output logic [`DURLEN-1:0] EarlyTermShiftE,
   output logic DivStickyE,
   output logic DivDone,
   output logic DivNegStickyE,
@@ -51,7 +51,7 @@ module srtfsm(
   typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
   statetype state;
 
-  logic [$clog2(`DIVLEN/2+3)-1:0] step;
+  logic [`DURLEN-1:0] step;
   logic WZero;
   //logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
   logic [`DIVLEN+3:0] W;
@@ -63,7 +63,7 @@ module srtfsm(
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
-  assign EarlyTermShiftDiv2E = step;
+  assign EarlyTermShiftE = step;
 
   always_ff @(posedge clk) begin
       if (reset) begin
@@ -73,7 +73,7 @@ module srtfsm(
           if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
           else         state <= #1 BUSY;
       end else if (state == BUSY) begin
-          if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
+          if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
               state <= #1 DONE;
           end
           step <= step - 1;
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index d17d2abd..fa76c051 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -35,7 +35,7 @@ module srtpreproc (
   output logic [`DIVLEN-1:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
+  output logic [`DURLEN-1:0] Dur
 );
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
@@ -63,10 +63,20 @@ module srtpreproc (
   
   assign X = PreprocX;
   assign Dpreproc = PreprocY;
-
-  assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
+  
+  assign Dur = (`DURLEN)'($rtoi(`FPDUR));
   // assign intExp = zeroCntB - zeroCntA + 1;
   // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
+  //           radix 2     radix 4
+  // 1 copies  DIVLEN+2    DIVLEN+2/2
+  // 2 copies  DIVLEN+2/2  DIVLEN+2/2*2
+  // 4 copies  DIVLEN+2/4  DIVLEN+2/2*4
+  // 8 copies  DIVLEN+2/8  DIVLEN+2/2*8
+
+  // DIVRESLEN = DIVLEN or DIVLEN+2
+  // r = 1 or 2
+  // DIVRESLEN/(r*`DIVCOPIES)
+
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index fa46a060..ba14499e 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -80,9 +80,9 @@ module testbenchfp;
   logic CvtResSgnE;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
-	logic [`DIVLEN+2:0] Quot;
+	logic [`QLEN-1:0] Quot;
   logic CvtResDenormUfE;
-  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
+  logic [`DURLEN-1:0] EarlyTermShift;
   logic DivStart, DivBusy;
   logic reset = 1'b0;
   logic [`DIVLEN-1:0]    DivX;
@@ -90,7 +90,7 @@ module testbenchfp;
   logic [`DIVLEN+3:0]  WSN, WS;
   logic [`DIVLEN+3:0]  WCN, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
-  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DURLEN-1:0] Dur;
 
   // in-between FMA signals
   logic                 Mult;
@@ -686,8 +686,8 @@ module testbenchfp;
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
               .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
-              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-              .PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes));
+              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   
   fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), 
             .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
@@ -697,8 +697,8 @@ module testbenchfp;
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
   srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
   srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
-                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
+                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
+  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
                 .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
 
   assign CmpFlg[3:0] = 0;