diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index b92bc07a..8f13b2e3 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -39,7 +39,7 @@
 
 // MISA RISC-V configuration per specification
 //                    ZYXWVUTSRQPONMLKJIHGFEDCBA
-`define MISA 32'b0000000000101000001000100100101
+`define MISA 32'b0000000000101000001000100101101
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 671f7343..015ef261 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -95,11 +95,25 @@
 
 // largest length in IEU/FPU
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
-`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
-`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
+`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
+`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
+
+// division constants
+`define RADIX 32'h2
+`define DIVCOPIES 32'h1
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
+`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
+`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
+`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
+`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
+// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
+// one interation is required for the integer bit for minimally redundent radix-4
+`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
+`define DURLEN ($clog2(`FPDUR+1))
+`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
+
 
 `define USE_SRAM 0
 
diff --git a/pipelined/regression/sim-testfloat b/pipelined/regression/sim-testfloat
index 18f174a9..25fe09a1 100755
--- a/pipelined/regression/sim-testfloat
+++ b/pipelined/regression/sim-testfloat
@@ -6,7 +6,7 @@
 # fma    - test fma
 # sub    - test subtraction
 # div    - test division
-# sqrt   - test square ro
+# sqrt   - test square root
 # all    - test everything
 
-vsim -do "do testfloat.do rv64fp mul"
+vsim -do "do testfloat.do rv64fp $1"
diff --git a/pipelined/regression/sim-testfloat-batch b/pipelined/regression/sim-testfloat-batch
index f1178f1d..c7f28a55 100755
--- a/pipelined/regression/sim-testfloat-batch
+++ b/pipelined/regression/sim-testfloat-batch
@@ -1,7 +1,9 @@
+
 # cvtint - test integer conversion unit (fcvtint)
 # cvtfp  - test floating-point conversion unit (fcvtfp)
 # cmp    - test comparison unit's LT, LE, EQ opperations (fcmp)
 # add    - test addition
+# fma    - test fma
 # sub    - test subtraction
 # div    - test division
 # sqrt   - test square root
diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally
index 06985148..6163ab8b 100755
--- a/pipelined/regression/sim-wally
+++ b/pipelined/regression/sim-wally
@@ -1,2 +1,2 @@
-vsim -do "do wally-pipelined.do rv32gc arch32i"
+vsim -do "do wally-pipelined.do rv32gc wally32periph"
 
diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch
index 91f11697..7e821e58 100755
--- a/pipelined/regression/sim-wally-batch
+++ b/pipelined/regression/sim-wally-batch
@@ -1 +1 @@
-vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f"
+vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 9e7ba49b..98c72f17 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -9,22 +9,31 @@ add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
 add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/DivBusy
-add wave -noupdate /testbenchfp/srtfsm/state
+add wave -noupdate /testbenchfp/divsqrt/srtfsm/state
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
+# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
+# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index f6aad78e..ca6a5c9c 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -42,10 +42,8 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   input logic [11:0]          NextAdr, // virtual address, but we only use the lower 12 bits.
   input logic [`PA_BITS-1:0]  PAdr, // physical address
   input logic [(`XLEN-1)/8:0] ByteMask,
-  input logic [`XLEN-1:0]     FinalWriteData,
-  input logic [`FLEN-1:0]     FWriteDataM,
-  input logic                        FLoad2,
-  input logic                 FpLoadStoreM,
+  input logic [WORDLEN-1:0]     FinalWriteData,
+  input logic                        FStore2,
   output logic                CacheCommitted,
   output logic                CacheStall,
    // to performance counters to cpu
@@ -72,7 +70,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   localparam                  SETLEN = $clog2(NUMLINES);
   localparam                  SETTOP = SETLEN+OFFSETLEN;
   localparam                  TAGLEN = `PA_BITS - SETTOP;
-  localparam                  WORDSPERLINE = LINELEN/`XLEN;
+  localparam                  WORDSPERLINE = LINELEN/WORDLEN;
   localparam                  FlushAdrThreshold   = NUMLINES - 1;
 
   logic                       SelAdr;
@@ -123,7 +121,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
 
   // Array of cache ways, along with victim, hit, dirty, and read merging logic
   cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) 
-    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
+    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2,
     .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
     .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, 
     .Invalidate(InvalidateCacheM));
@@ -162,12 +160,8 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
   /////////////////////////////////////////////////////////////////////////////////////////////
-  if (`LLEN>`XLEN)
-    mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
-      .d1({WORDSPERLINE/2{FWriteDataM}}),	.d2(CacheBusWriteData),	.s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
-  else
-    mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
-      .d1(CacheBusWriteData),	.s(SetValid), .y(CacheWriteData));
+  mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
+  .d1(CacheBusWriteData),	.s(SetValid), .y(CacheWriteData));
   mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
 		.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
 		.d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}),
diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv
index cbaf915d..192fb9ac 100644
--- a/pipelined/src/cache/cacheway.sv
+++ b/pipelined/src/cache/cacheway.sv
@@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   input logic [$clog2(NUMLINES)-1:0] RAdr,
   input logic [`PA_BITS-1:0]         PAdr,
   input logic [LINELEN-1:0]          CacheWriteData,
-  input logic                        FLoad2,
+  input logic                        FStore2,
   input logic                        SetValidWay,
   input logic                        ClearValidWay,
   input logic                        SetDirtyWay,
@@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
     logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
     onehotdecoder #(LOGWPL) adrdec(
       .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
-    assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
+    assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0};
   end else
     onehotdecoder #(LOGWPL) adrdec(
       .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 935ed3c1..3fbc9419 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -1,10 +1,10 @@
 `include "wally-config.vh"
 
 module divshiftcalc(
-    input logic  [`DIVLEN+2:0] Quot,
+    input logic  [`QLEN-1-(`RADIX/4):0] DivQm,
     input logic  [`FMTBITS-1:0] Fmt,
-    input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
-    input logic [`NE+1:0] DivCalcExp,
+    input logic [`DURLEN-1:0] DivEarlyTermShift,
+    input logic [`NE+1:0] DivQe,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
     output logic DivResDenorm,
@@ -14,27 +14,28 @@ module divshiftcalc(
 
     // is the result denromalized
     // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
-    assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
+    assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
 
     // if the result is denormalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  .00xxxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = +1
+    //  00000000x.xxxxxx...                     Exp = DivQe
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivQe+NF+1
+    //  .00xxxxxxxxxxxxx... << DivQe+NF+1  Exp = +1
     //  .0000xxxxxxxxxxx... >> 1                Exp = 1
-    // Left shift amount  = DivCalcExp+NF+1-1
-    assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
+    // Left shift amount  = DivQe+NF+1-1
+    assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
     // if the result is normalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  00000000.xxxxxxx... << NF               Exp = DivCalcExp+1
-    //  00000000x.xxxxxx... << NF               Exp = DivCalcExp (extra shift done afterwards)
-    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1 (determined after)
+    //  00000000x.xxxxxx...                     Exp = DivQe
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivQe+NF+1
+    //  00000000.xxxxxxx... << NF               Exp = DivQe+1
+    //  00000000x.xxxxxx... << NF               Exp = DivQe (extra shift done afterwards)
+    //  00000000xx.xxxxx... << 1?               Exp = DivQe-1 (determined after)
     // inital Left shift amount  = NF
+    // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
     assign NormShift = (`NE+2)'(`NF);
     // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
+    // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
+    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
 
-    // *** may be able to reduce shifter size
-    assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
+    assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
 
 endmodule
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index 086b97d8..cbf7f95f 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -43,26 +43,27 @@ module divsqrt(
   input  logic StallM,
   input logic StallE,
   output logic DivStickyM,
-  output logic DivNegStickyM,
   output logic DivBusy,
   output logic DivDone,
   output logic [`NE+1:0] DivCalcExpM,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
-  output logic [`DIVLEN+2:0] QuotM
+  output logic [`DURLEN-1:0] EarlyTermShiftM,
+  output logic [`QLEN-1-(`RADIX/4):0] QuotM
 //   output logic [`XLEN-1:0] RemM,
 );
 
-  logic [`DIVLEN+3:0]  WSN, WCN;
+  logic [`DIVLEN+3:0]  NextWSN, NextWCN;
   logic [`DIVLEN+3:0]  WS, WC;
+  logic [`DIVLEN+3:0] StickyWSA;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DIVLEN-1:0] X;
   logic [`DIVLEN-1:0] Dpreproc;
-  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DURLEN-1:0] Dur;
+  logic NegSticky;
 
-  srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
-  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
-                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
-                .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
+               .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
+  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
+                .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index 2d9fc21c..b9932523 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -68,7 +68,8 @@ module fcvt (
     logic                   Signed;     // is the opperation with a signed integer?
     logic                   Int64;      // is the integer 64 bits?
     logic                   IntToFp;       // is the opperation an int->fp conversion?
-    logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
+    logic [`CVTLEN:0]       LzcInFull;      // input to the Leading Zero Counter (priority encoder)
+    logic [`LOGCVTLEN-1:0]  LeadingZeros; // output from the LZC
 
 
     // seperate OpCtrl for code readability
@@ -102,10 +103,11 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
-                             {Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
+    assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
+                             {Xm, {`CVTLEN-`NF{1'b0}}};
+    assign LzcIn = LzcInFull[`CVTLEN-1:0];
     
-    lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros));
+    lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
 
     ///////////////////////////////////////////////////////////////////////////
     // shifter
@@ -119,13 +121,13 @@ module fcvt (
     //      denormalized/undeflowed result fp -> fp:
     //          - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
     //      ??? -> fp: 
-    //          - shift left by LeadingZeros+1 - to shift till the result is normalized
+    //          - shift left by LeadingZeros - to shift till the result is normalized
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
     assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
                     ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : 
-                              (LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}};
+                              (LeadingZeros);
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
@@ -197,14 +199,14 @@ module fcvt (
     //                  |  0's |     Mantissa      |      0's if nessisary     |
     //                  |     keep        |
     //
-    //              - if the input is denormalized then we dont shift... so the  "- (LeadingZeros+1)" is just leftovers from other options
-    //      int -> fp : largest bias +  XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros
+    //              - if the input is denormalized then we dont shift... so the  "- LeadingZeros" is just leftovers from other options
+    //      int -> fp : largest bias +  XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
     //              Process:
     //                  - shifted right by XLEN (XLEN)
-    //                  - shift left to normilize (-1-LeadingZeros)
+    //                  - shift left to normilize (-LeadingZeros)
     //                  - newBias to make the biased exponent
-    //          oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp)
-    assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
+    //          oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
+    assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
     // find if the result is dnormal or underflows
     //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
     //      - can't underflow an integer to Fp conversion
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 98250a45..6b1bc638 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -34,24 +34,24 @@ module flags(
     input logic                 XInf, YInf, ZInf,    // inputs are infinity
     input logic                 Plus1,
     input logic                 InfIn,                  // is a Inf input being used
+    input logic                 NaNIn,                  // is a NaN input being used
+    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic                 XZero, YZero,         // inputs are zero
     input logic                 XNaN, YNaN,           // inputs are NaN
-    input logic                 NaNIn,                  // is a NaN input being used
     input logic                 Sqrt,                   // Sqrt?
     input logic                 ToInt,                  // convert to integer
     input logic                 IntToFp,                // convert integer to floating point
     input logic                 Int64,                  // convert to 64 bit integer
     input logic                 Signed,                 // convert to a signed integer
-    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic [`NE:0]         CvtCe,            // the calculated expoent - Cvt
     input logic                 CvtOp,                  // conversion opperation?
     input logic                 DivOp,                  // conversion opperation?
     input logic                 FmaOp,                  // Fma opperation?
-    input logic  [`NE+1:0]      FullResExp,             // Re with bits to determine sign and overflow
-    input logic  [`NE+1:0]      Nexp,               // exponent of the normalized sum
+    input logic  [`NE+1:0]      FullRe,             // Re with bits to determine sign and overflow
+    input logic  [`NE+1:0]      Me,               // exponent of the normalized sum
     input logic  [1:0]          CvtNegResMsbs,             // the negitive integer result's most significant bits
     input logic                 FmaAs, FmaPs,        // the product and modified Z signs
-    input logic                 R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
+    input logic                 R, UfL, S, UfPlus1, // bits used to determine rounding
     output logic                DivByZero,
     output logic                IntInvalid, Invalid, Overflow, // flags used to select the res
     output logic [4:0]          PostProcFlg // flags
@@ -73,30 +73,30 @@ module flags(
 
 
    if (`FPSIZES == 1) begin
-        assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
-        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+        assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+        assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
 
     end else if (`FPSIZES == 2) begin    
-        assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
+        assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
 
-        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+        assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
     end else if (`FPSIZES == 3) begin
         always_comb
             case (OutFmt)
-                `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
-                `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
-                `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
+                `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+                `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+                `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
                 default: ResExpGteMax = 1'bx;
             endcase
-            assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+            assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
 
     end else if (`FPSIZES == 4) begin        
         always_comb
             case (OutFmt)
-                `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
-                `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
-                `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
-                `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
+                `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
+                `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
+                `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
+                `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
             endcase
             // a left shift of intlen+1 is still in range but any more than that is an overflow
             //           inital: |      64 0's         |    XLEN     |
@@ -110,14 +110,14 @@ module flags(
             //      - any of the bits after the most significan 1 is one
             //      - the most signifcant in 65 or 33 is still a one in the number and
             //        one of the later bits is one
-            assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+            assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
     end
 
     //                 if the result is greater than or equal to the max exponent(not taking into account sign)
     //                 |           and the exponent isn't negitive
     //                 |           |                   if the input isnt infinity or NaN
     //                 |           |                   |            
-    assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
+    assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
 
     // detecting tininess after rounding
     //                  the exponent is negitive
@@ -127,11 +127,11 @@ module flags(
     //                  |                    |                    |                                      |                     and if the result is not exact
     //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
     //                  |                    |                    |                                      |                     |               |
-    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
+    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero);
 
     // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
     //      - Don't set the underflow flag if an underflowed res isn't outputed
-    assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
+    assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero);
 
     //                  if the res is too small to be represented and not 0
     //                  |                                     and if the res is not invalid (outside the integer bounds)
@@ -153,7 +153,7 @@ module flags(
     //                  |           |                                  |                    |               or the res rounds up out of bounds
     //                  |           |                                  |                    |                       and the res didn't underflow
     //                  |           |                                  |                    |                       |
-    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
+    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
     //                                                                                                     |
     //                                                                                                     or when the positive res rounds up out of range
     assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 57b053da..44cd3616 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -51,7 +51,6 @@ module fma(
     logic [3*`NF+5:0]   Am;     // addend aligned's mantissa for addition in U(NF+5.2NF+1)
     logic [3*`NF+6:0]   AmInv;   // aligned addend's mantissa possibly inverted
     logic [2*`NF+1:0]   PmKilled;      // the product's mantissa possibly killed
-    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
     ///////////////////////////////////////////////////////////////////////////////
     // Calculate the product
     //      - When multipliying two fp numbers, add the exponents
@@ -70,20 +69,21 @@ module fma(
     ///////////////////////////////////////////////////////////////////////////////
     // Alignment shifter
     ///////////////////////////////////////////////////////////////////////////////
-
-    align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
-                        .Am, .ZmSticky, .KillProd);
-                        
     // calculate the signs and take the opperation into account
     sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
 
+    align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
+                .Am, .ZmSticky, .KillProd);
+                        
+
+
     // ///////////////////////////////////////////////////////////////////////////////
     // // Addition/LZA
     // ///////////////////////////////////////////////////////////////////////////////
         
-    add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
+    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
     
-    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
+    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
 endmodule
 
 
@@ -172,7 +172,7 @@ module align(
     // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
     assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
     
-    assign KillProd = ACnt[`NE+1]|XZero|YZero;
+    assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
     assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
 
     always_comb
@@ -183,7 +183,7 @@ module align(
         //          |   54'b0    |  106'b(product)  | 2'b0 |
         //  | addnend |
         if (KillProd) begin
-            ZmShifted = ZmPreshifted;
+            ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)};
             ZmSticky = ~(XZero|YZero);
 
         // If the addend is too small to effect the addition        
@@ -221,14 +221,14 @@ module add(
     input logic  [2*`NF+1:0]    Pm,       // the product's mantissa
     input logic                 Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
     input logic                 KillProd,      // should the product be set to 0
-    input logic                 XZero, YZero, // is the input zero
+    input logic                 ZmSticky,
     output logic [3*`NF+6:0]    AmInv,  // aligned addend possibly inverted
     output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
     output logic                NegSum,        // was the sum negitive
     output logic                InvA,          // do you invert the aligned addend
-    output logic [3*`NF+5:0]    Sm,           // the positive sum
-    output logic [3*`NF+6:0]    PreSum, NegPreSum// possibly negitive sum
+    output logic [3*`NF+5:0]    Sm           // the positive sum
 );
+    logic [3*`NF+6:0]    PreSum, NegPreSum; // possibly negitive sum
 
     ///////////////////////////////////////////////////////////////////////////////
     // Addition
@@ -243,13 +243,14 @@ module add(
     assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
     // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
     assign PmKilled = Pm&{2*`NF+2{~KillProd}};
-
-
-
     // Do the addition
     //      - calculate a positive and negitive sum in parallel
-    assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA};
-    assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
+    //              Zsticky             Psticky
+    // PreSum    -1 = don't add 1     +1 = add 2
+    // NegPreSum +1 = add 2           -1 = don't add 1
+    // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
+    assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
+    assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
      
     // Is the sum negitive
     assign NegSum = PreSum[3*`NF+6];
@@ -261,7 +262,7 @@ endmodule
 
 module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
     input logic  [3*`NF+6:0] A,     // addend
-    input logic  [2*`NF+1:0] P,     // product
+    input logic  [2*`NF+3:0] P,     // product
     output logic [$clog2(3*`NF+7)-1:0]       NCnt   // normalization shift count for the positive result
     ); 
     
@@ -273,12 +274,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
     assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
     assign G[3*`NF+6:2*`NF+4] = 0;
     assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
-    assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
-    assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
-    assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
-    assign T[1:0] = A[1:0];
-    assign G[1:0] = 0;
-    assign Z[1:0] = ~A[1:0];
+    assign T[2*`NF+3:0] = A[2*`NF+3:0]^P;
+    assign G[2*`NF+3:0] = A[2*`NF+3:0]&P;
+    assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P;
 
 
     // Apply function to determine Leading pattern
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 5f55e17b..a6c1a1c6 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -35,9 +35,8 @@ module fmashiftcalc(
     input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
     input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
     input logic                         FmaKillProd,  // is the product set to zero
-    input logic 			            ZDenorm,
-    output logic [`NE+1:0]              FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
-    output logic                        FmaSmZero,    // is the result denormalized - calculated before LZA corection
+    output logic [`NE+1:0]              FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
+    output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
     output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
@@ -50,35 +49,36 @@ module fmashiftcalc(
     ///////////////////////////////////////////////////////////////////////////////
     //*** insert bias-bias simplification in fcvt.sv/phone pictures
     // Determine if the sum is zero
-    assign FmaSmZero = ~(|FmaSm);
+    assign FmaSZero = ~(|FmaSm);
 
     // calculate the sum's exponent
-    assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
+    //                                                                      ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
+    assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
-        assign FmaConvNormSumExp = NormSumExp;
+        assign FmaNe = NormSumExp;
 
     end else if (`FPSIZES == 2) begin
-        assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+        assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
 
     end else if (`FPSIZES == 3) begin
         always_comb begin
             case (Fmt)
-                `FMT: FmaConvNormSumExp = NormSumExp;
-                `FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
-                `FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
-                default: FmaConvNormSumExp = {`NE+2{1'bx}};
+                `FMT: FmaNe = NormSumExp;
+                `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+                `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
+                default: FmaNe = {`NE+2{1'bx}};
             endcase
         end
 
     end else if (`FPSIZES == 4) begin
         always_comb begin
             case (Fmt)
-                2'h3: FmaConvNormSumExp = NormSumExp;
-                2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
-                2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
-                2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
+                2'h3: FmaNe = NormSumExp;
+                2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
+                2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
+                2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
             endcase
         end
 
@@ -90,7 +90,7 @@ module fmashiftcalc(
         logic Sum0LEZ, Sum0GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
         assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
+        assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
 
     end else if (`FPSIZES == 2) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
@@ -98,7 +98,7 @@ module fmashiftcalc(
         assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
         assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
         assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
-        assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero;
+        assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
 
     end else if (`FPSIZES == 3) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
@@ -110,9 +110,9 @@ module fmashiftcalc(
         assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
-                `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
-                `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
-                `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
+                `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+                `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+                `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
                 default: FmaPreResultDenorm = 1'bx;
             endcase
         end
@@ -129,10 +129,10 @@ module fmashiftcalc(
         assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
-                2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
-                2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
-                2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
-                2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero;
+                2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+                2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+                2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
+                2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
             endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
         end
 
@@ -144,13 +144,13 @@ module fmashiftcalc(
     //      - if kill prod dont add to exp
 
     // Determine if the result is denormal
-    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero;
+    // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
-    assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift;
+    assign FmaShiftAmt = FmaNCnt+DenormShift;
 endmodule
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 8336c39c..65be2997 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -42,7 +42,7 @@ module fpu (
   input logic [1:0]        STATUS_FS, // Is floating-point enabled?
   output logic 		   FRegWriteM, // FP register write enable
   output logic 		   FpLoadStoreM, // Fp load instruction?
-  output logic              FLoad2,
+  output logic              FStore2,
   output logic 		   FStallD, // Stall the decode stage
   output logic 		   FWriteIntE, // integer register write enables
   output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
@@ -125,12 +125,11 @@ module fpu (
    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`DIVLEN+2:0] QuotE, QuotM;
+   logic [`QLEN-1-(`RADIX/4):0] QuotM;
    logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
-   logic DivNegStickyE, DivNegStickyM;
    logic DivStickyE, DivStickyM;
    logic DivDoneM;
-   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
+   logic [`DURLEN-1:0] EarlyTermShiftM;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -288,8 +287,8 @@ module fpu (
    //       .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
    divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
-                  .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
-                  .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
+                  .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
+                  .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
    // other FP execution units
    fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
             .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
@@ -308,8 +307,8 @@ module fpu (
       assign FWriteDataE = FSrcYE[`XLEN-1:0]; 
    end else begin
       logic [`FLEN-1:0] FWriteDataE;
-      if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
-      else assign FLoad2 = FmtM;
+      if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
+      else assign FStore2 = FmtM;
 
       if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
       else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
@@ -381,12 +380,12 @@ module fpu (
 
    assign FpLoadStoreM = FResSelM[1];
 
-   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M),
-                           .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
-                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
-                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
-                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
-                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
+   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
+                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM),
+                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
+                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
+                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
+                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
    // FPU flag selection - to privileged
    mux2  #(5)  FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv
new file mode 100644
index 00000000..8d11273a
--- /dev/null
+++ b/pipelined/src/fpu/otfc.sv
@@ -0,0 +1,112 @@
+///////////////////////////////////////////
+// otfc.sv
+//
+// Written: me@KatherineParry.com, cturek@hmc.edu 
+// Modified:7/14/2022
+//
+// Purpose: On the fly conversion
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module otfc2 (
+  input  logic         qp, qz,
+  input  logic [`QLEN-1:0] Q, QM,
+  output logic [`QLEN-1:0] QNext, QMNext
+);
+  //  The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division only.
+  logic [`QLEN-2:0] QR, QMR;
+
+  assign QR  = Q[`QLEN-2:0];
+  assign QMR = QM[`QLEN-2:0];     // Shifted Q and QM
+
+  always_comb begin
+    if (qp) begin
+      QNext  = {QR,  1'b1};
+      QMNext = {QR,  1'b0};
+    end else if (qz) begin
+      QNext  = {QR,  1'b0};
+      QMNext = {QMR, 1'b1};
+    end else begin        // If qp and qz are not true, then qn is
+      QNext  = {QMR, 1'b1};
+      QMNext = {QMR, 1'b0};
+    end 
+  end
+
+endmodule
+
+
+module otfc4 (
+  input  logic [3:0]   q,
+  input  logic [`QLEN-1:0] Q, QM,
+  output logic [`QLEN-1:0] QNext, QMNext
+);
+
+  //  The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come. 
+  //
+  //  This code follows the psuedocode presented in the 
+  //  floating point chapter of the book. Right now, 
+  //  it is written for Radix-4 division.
+  //
+  //  QM is Q-1. It allows us to write negative bits 
+  //  without using a costly CPA. 
+
+  //  QR and QMR are the shifted versions of Q and QM.
+  //  They are treated as [N-1:r] size signals, and 
+  //  discard the r most significant bits of Q and QM. 
+  logic [`QLEN-3:0] QR, QMR;
+
+  // shift Q (quotent) and QM (quotent-1)
+		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
+		// else if 	q = 1   Q = {Q, 01} 	QM = {Q, 00}	
+		// else if 	q = 0   Q = {Q, 00} 	QM = {QM, 11}	
+		// else if 	q = -1	Q = {QM, 11} 	QM = {QM, 10}
+		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
+
+  assign QR  = Q[`QLEN-3:0];
+  assign QMR = QM[`QLEN-3:0];     // Shifted Q and QM
+  always_comb begin
+    if (q[3]) begin // +2
+      QNext  = {QR,  2'b10};
+      QMNext = {QR,  2'b01};
+    end else if (q[2]) begin // +1
+      QNext  = {QR,  2'b01};
+      QMNext = {QR,  2'b00};
+    end else if (q[1]) begin // -1
+      QNext  = {QMR,  2'b11};
+      QMNext = {QMR,  2'b10};
+    end else if (q[0]) begin // -2
+      QNext  = {QMR,  2'b10};
+      QMNext = {QMR,  2'b01};
+    end else begin           // 0
+      QNext  = {QR,  2'b00};
+      QMNext = {QMR, 2'b11};
+    end 
+  end
+  // Final Quoteint is in the range [.5, 2)
+
+endmodule
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index e165e7e1..f9ccd255 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -29,7 +29,7 @@
 
 `include "wally-config.vh"
 
-module postprocess(
+module postprocess (
     // general signals
     input logic                             Xs, Ys,  // input signs
     input logic  [`NE-1:0]                  Ze, // input exponents
@@ -48,18 +48,17 @@ module postprocess(
     input logic                             FmaPs,      // the product's sign
     input logic  [`NE+1:0]                  FmaPe,       // Product exponent
     input logic  [3*`NF+5:0]                FmaSm,       // the positive sum
-    input logic                             FmaZmSticky,  // sticky bit that is calculated during alignment
+    input logic                             FmaZmS,  // sticky bit that is calculated during alignment
     input logic                             FmaKillProd,      // set the product to zero before addition if the product is too small to matter
     input logic                             FmaNegSum,    // was the sum negitive
     input logic                             FmaInvA,      // do you invert Z
     input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
     //divide signals
-    input logic  [$clog2(`DIVLEN/2+3)-1:0]  DivEarlyTermShiftDiv2,
-    input logic                             DivSticky,
-    input logic                             DivNegSticky,
+    input logic  [`DURLEN-1:0]              DivEarlyTermShift,
+    input logic                             DivS,
     input logic                             DivDone,
-    input logic  [`NE+1:0]                  DivCalcExp,
-    input logic  [`DIVLEN+2:0]              Quot,
+    input logic  [`NE+1:0]                  DivQe,
+    input logic  [`QLEN-1-(`RADIX/4):0]                DivQm,
     // conversion signals
     input logic                             CvtCs,     // the result's sign
     input logic  [`NE:0]                    CvtCe,    // the calculated expoent
@@ -69,7 +68,7 @@ module postprocess(
     input logic  [`CVTLEN-1:0]              CvtLzcIn,      // input to the Leading Zero Counter (priority encoder)
     input logic                             IntZero,         // is the input zero
     // final results
-    output logic [`FLEN-1:0]                W,    // FMA final result
+    output logic [`FLEN-1:0]                PostProcRes,    // FMA final result
     output logic [4:0]                      PostProcFlg,
     output logic [`XLEN-1:0]                FCvtIntRes    // the int conversion result
     );
@@ -78,32 +77,31 @@ module postprocess(
     logic Ws;
     logic [`NF-1:0] Rf; // Result fraction
     logic [`NE-1:0] Re;  // Result exponent
-    logic Nsgn;
-    logic [`NE+1:0] Nexp;
-    logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
-    logic [`NE+1:0] FullResExp;  // Re with bits to determine sign and overflow
+    logic Ms;
+    logic [`NE+1:0] Me;
+    logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
+    logic [`NE+1:0] FullRe;  // Re with bits to determine sign and overflow
     logic S;           // S bit
     logic UfPlus1;                    // do you add one (for determining underflow flag)
     logic R;   // bits needed to determine rounding
-    logic [`FLEN:0] RoundAdd;       // how much to add to the result
     logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt;   // normalization shift count
     logic [`NORMSHIFTSZ-1:0] ShiftIn;        // is the sum zero
     logic [`NORMSHIFTSZ-1:0] Shifted;    // the shifted result
     logic Plus1;      // add one to the final result?
     logic IntInvalid, Overflow, Invalid; // flags
-    logic UfLSBRes;
+    logic UfL;
     logic [`FMTBITS-1:0] OutFmt;
     // fma signals
     logic [`NE+1:0] FmaSe;     // exponent of the normalized sum
-    logic FmaSmZero;        // is the sum zero
+    logic FmaSZero;        // is the sum zero
     logic [3*`NF+8:0] FmaShiftIn;        // shift input
-    logic [`NE+1:0] FmaConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic [`NE+1:0] FmaNe;          // exponent of the normalized sum not taking into account denormal or zero results
     logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
     logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt;   // normalization shift count
     // division singals
     logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
     logic [`NORMSHIFTSZ-1:0] DivShiftIn;
-    logic [`NE+1:0] DivCorrExp;
+    logic [`NE+1:0] Qe;
     logic DivByZero;
     logic DivResDenorm;
     logic [`NE+1:0] DivDenormShift;
@@ -152,9 +150,9 @@ module postprocess(
 
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
-                          .ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
+    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
+                          .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+    divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSel)
@@ -183,9 +181,9 @@ module postprocess(
     
     normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
-    lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
-                                .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
-                                .DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac);
+    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
+                                .DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
+                                .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -199,19 +197,19 @@ module postprocess(
 
                           
     roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, 
-                          .Xs, .Ys, .CvtCs, .Nsgn);
+                          .Xs, .Ys, .CvtCs, .Ms);
 
-    round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
-                .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
-                .DivSticky, .DivNegSticky, .DivDone,
-                .DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
+    round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
+                .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
+                .DivS, .DivDone,
+                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
     ///////////////////////////////////////////////////////////////////////////////
 
     resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
-                          .FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws);
+                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Flags
@@ -220,18 +218,18 @@ module postprocess(
     flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
                 .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
                 .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
-                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
-                .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
+                .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
+                .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Select the result
     ///////////////////////////////////////////////////////////////////////////////
 
     negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
-    resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
+    specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
         .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
         .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
         .XInf, .YInf, .DivOp,
-        .DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes);
+        .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
 
 endmodule
diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv
new file mode 100644
index 00000000..396ca776
--- /dev/null
+++ b/pipelined/src/fpu/qsel.sv
@@ -0,0 +1,135 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu 
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module qsel2 ( // *** eventually just change to 4 bits
+  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
+  output logic         qp, qz//, qn
+);
+ 
+  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic          magnitude, sign, cout;
+
+  // The quotient selection logic is presented for simplicity, not
+  // for efficiency.  You can probably optimize your logic to
+  // select the proper divisor with less delay.
+
+  // Quotient equations from EE371 lecture notes 13-20
+  assign p = ps ^ pc;
+  assign g = ps & pc;
+
+  assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
+  assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
+  assign sign = p[`DIVLEN+3] ^ cout;
+/*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
+			  (ps[52]^pc[52]));
+  assign #1 sign = (ps[55]^pc[55])^
+      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
+			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
+						(ps[52]&pc[52]))))); */
+
+  // Produce quotient = +1, 0, or -1
+  assign qp = magnitude & ~sign;
+  assign qz = ~magnitude;
+//   assign #1 qn = magnitude & sign;
+endmodule
+
+module qsel4 (
+	input logic [`DIVLEN+3:0] D,
+	input logic [`DIVLEN+3:0] WS, WC,
+	output logic [3:0] q
+);
+	logic [6:0] Wmsbs;
+	logic [7:0] PreWmsbs;
+	logic [2:0] Dmsbs;
+	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
+	assign Wmsbs = PreWmsbs[7:1];
+	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
+	// D = 0001.xxx...
+	// Dmsbs = |   |
+  // W =      xxxx.xxx...
+	// Wmsbs = |        |
+
+	logic [3:0] QSel4[1023:0];
+
+  always_comb begin 
+    integer d, w, i, w2;
+    for(d=0; d<8; d++)
+      for(w=0; w<128; w++)begin
+        i = d*128+w;
+        w2 = w-128*(w>=64); // convert to two's complement
+        case(d)
+          0: if($signed(w2)>=$signed(12))      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-4)  QSel4[i] = 4'b0000; 
+            else if(w2>=-13) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          1: if(w2>=14)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-15) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          2: if(w2>=15)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-16) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          3: if(w2>=16)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-18) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          4: if(w2>=18)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          5: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          6: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-22) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          7: if(w2>=24)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-24) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+        endcase
+      end
+  end
+	assign q = QSel4[{Dmsbs,Wmsbs}];
+	
+endmodule
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index 05c3b461..e1ea5e41 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -35,32 +35,29 @@ module resultsign(
     input logic         InfIn,
     input logic         FmaOp,
     input logic [`NE+1:0] FmaSe,
-    input logic         FmaSmZero,
+    input logic         FmaSZero,
     input logic         Mult,
     input logic         R,
     input logic         S,
-    input logic         Nsgn,
+    input logic         Ms,
     output logic        Ws
 );
 
-    logic ZeroSgn;
-    logic InfSgn;
-    logic Underflow;
-    // logic ResultSgnTmp;
+    logic Zeros;
+    logic Infs;
 
     // Determine the sign if the sum is zero
     //      if cancelation then 0 unless round to -infinity
     //      if multiply then Psgn
     //      otherwise psign
-    assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
-    assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
+    assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
 
 
     // is the result negitive
     //  if p - z is the Sum negitive
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
-    assign InfSgn = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn;
+    assign Infs = ZInf ? FmaAs : FmaPs;
+    assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 4c185ff3..6132dba4 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -46,36 +46,32 @@ module round(
     input logic  [1:0]              PostProcSel,
     input logic                     CvtResDenormUf,
     input logic                     CvtResUf,
-    input logic  [`CORRSHIFTSZ-1:0] Nfrac,
-    input logic                     FmaZmSticky,  // addend's sticky bit
-    input logic                     ZZero,         // is Z zero
-    input logic                     FmaInvA,          // invert Z
+    input logic  [`CORRSHIFTSZ-1:0] Mf,
+    input logic                     FmaZmS,  // addend's sticky bit
     input logic  [`NE+1:0]          FmaSe,         // exponent of the normalized sum
-    input logic                     Nsgn,      // the result's sign
+    input logic                     Ms,      // the result's sign
     input logic  [`NE:0]            CvtCe,    // the calculated expoent
-    input logic  [`NE+1:0]          DivCorrExp,    // the calculated expoent
-    input logic                     DivSticky,             // sticky bit
-    input logic                     DivNegSticky,
+    input logic  [`NE+1:0]          Qe,    // the calculated expoent
+    input logic                     DivS,             // sticky bit
     output logic                    UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]          FullResExp,      // Re with bits to determine sign and overflow
+    output logic [`NE+1:0]          FullRe,      // Re with bits to determine sign and overflow
     output logic [`NF-1:0]          Rf,         // Result fraction
     output logic [`NE-1:0]          Re,          // Result exponent
     output logic                    S,             // sticky bit
-    output logic [`NE+1:0]          Nexp,
+    output logic [`NE+1:0]          Me,
     output logic                    Plus1,
-    output logic [`FLEN:0]          RoundAdd,           // how much to add to the result
-    output logic                    R, UfLSBRes // bits needed to calculate rounding
+    output logic                    R, UfL // bits needed to calculate rounding
 );
-    logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
-    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic           UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
-    logic           NormSumSticky;  // normalized sum's sticky bit
-    logic           UfSticky;   // sticky bit for underlow calculation
+    logic           L;         // bit used for rounding - least significant bit of the normalized sum
+    logic           UfCalcPlus1; 
+    logic           NormS;  // normalized sum's sticky bit
+    logic           UfS;   // sticky bit for underlow calculation
     logic [`NF-1:0] RoundFrac;
     logic           FpRes, IntRes;
-    logic           UfRound;
+    logic           UfR;
     logic           FpRound, FpLSBRes, FpUfRound;
     logic           CalcPlus1, FpPlus1;
+    logic [`FLEN:0] RoundAdd;           // how much to add to the result
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -118,61 +114,61 @@ module round(
     //      |    NF     |1|1|
     //                     ^    ^ if floating point result
     //                     ^ if not an FMA result
-        if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                 (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                 (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
     //     2: NF > XLEN
-        if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
-                                                 (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
+                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
     end else if (`FPSIZES == 2) begin
         // XLEN is either 64 or 32
         // so half and single are always smaller then XLEN
 
         // 1: XLEN > NF   > NF1
-        if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
         // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
         // 3: NF   > NF1  > XLEN
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
     end else if (`FPSIZES == 3) begin
         // 1: XLEN > NF   > NF1
-        if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
         // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
         // 3: NF   > NF1  > XLEN
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
     end else if (`FPSIZES == 4) begin
         // Quad precision will always be greater than XLEN
         // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                  (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
         // 3: NF   > NF1  > XLEN
         // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
 
     end
     
@@ -180,37 +176,37 @@ module round(
 
     // only add the Addend sticky if doing an FMA opperation
     //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-    assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
+    assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
     
     // determine round and LSB of the rounded value
     //      - underflow round bit is used to determint the underflow flag
     if (`FPSIZES == 1) begin
-        assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
-        assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
-        assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
+        assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
+        assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
+        assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
 
     end else if (`FPSIZES == 2) begin
-        assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
-        assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
-        assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
+        assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
+        assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
+        assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
 
     end else if (`FPSIZES == 3) begin
         always_comb
             case (OutFmt)
                 `FMT: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
                 end
                 `FMT1: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
                 end
                 `FMT2: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
                 end
                 default: begin
                     FpRound = 1'bx;
@@ -222,130 +218,97 @@ module round(
         always_comb
             case (OutFmt)
                 2'h3: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
                 end
                 2'h1: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
                 end
                 2'h0: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
                 end
                 2'h2: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
                 end
             endcase
     end
 
-    assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
-    assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
-    assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
+    assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
+    assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
+    assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
 
     // used to determine underflow flag
-    assign UfLSBRes = FpRound;
+    assign UfL = FpRound;
     // determine sticky
-    assign S = UfSticky | UfRound;
-
-
-    // Deterimine if a small number was supposed to be subtrated
-    //  - for FMA or if division has a negitive sticky bit
-    assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound);
-    assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky;
+    assign S = UfS | UfR;
 
 
     always_comb begin
         // Determine if you add 1
         case (Frm)
-            3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even
+            3'b000: CalcPlus1 = R & (S| L);//round to nearest even
             3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down
-            3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up
-            3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude
+            3'b010: CalcPlus1 = Ms;//round down
+            3'b011: CalcPlus1 = ~Ms;//round up
+            3'b100: CalcPlus1 = R;//round to nearest max magnitude
             default: CalcPlus1 = 1'bx;
         endcase
         // Determine if you add 1 (for underflow flag)
         case (Frm)
-            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
+            3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
             3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down
-            3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up
-            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
+            3'b010: UfCalcPlus1 = Ms;//round down
+            3'b011: UfCalcPlus1 = ~Ms;//round up
+            3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
             default: UfCalcPlus1 = 1'bx;
         endcase
-        // Determine if you subtract 1
-        case (Frm)
-            3'b000: CalcMinus1 = 0;//round to nearest even
-            3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero
-            3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down
-            3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up
-            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
-            default: CalcMinus1 = 1'bx;
-        endcase
    
     end
 
     // If an answer is exact don't round
     assign Plus1 = CalcPlus1 & (S | R);
     assign FpPlus1 = Plus1&~(ToInt&CvtOp);
-    assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
-    assign Minus1 = CalcMinus1 & (S | R);
+    assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
 
     // Compute rounded result
     if (`FPSIZES == 1) begin
-        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
+        assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
 
     end else if (`FPSIZES == 2) begin
         // \/FLEN+1
         //  | NE+2 |        NF      |
         //  '-NE+2-^----NF1----^
         // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-        assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
-                                   Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+        assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
 
     end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (OutFmt)
-                `FMT:  RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
-                `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
-                `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
-                default: RoundAdd = (`FLEN+1)'(0);
-            endcase
-        end
+        assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
 
-    end else if (`FPSIZES == 4) begin        
-        always_comb begin
-            case (OutFmt)
-                2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
-                2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
-                2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
-                2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
-            endcase
-        end
-
-    end
+    end else if (`FPSIZES == 4)      
+        assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
 
     // determine the result to be roundned
-    assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+    assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
     
     always_comb
         case(PostProcSel)
-            2'b10: Nexp = FmaSe; // fma
-            2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
-            2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
-            default: Nexp = '0; 
+            2'b10: Me = FmaSe; // fma
+            2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
+            2'b01: Me = DivDone ? Qe : '0; // divide
+            default: Me = '0; 
         endcase
 
     // round the result
     //      - if the fraction overflows one should be added to the exponent
-    assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd;
-    assign Re = FullResExp[`NE-1:0];
+    assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
+    assign Re = FullRe[`NE-1:0];
 
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv
index 22686b24..55e322bc 100644
--- a/pipelined/src/fpu/roundsign.sv
+++ b/pipelined/src/fpu/roundsign.sv
@@ -38,11 +38,11 @@ module roundsign(
     input logic         DivOp,
     input logic         CvtOp,
     input logic         CvtCs,
-    output logic        Nsgn
+    output logic        Ms
 );
 
     logic FmaResSgnTmp;
-    logic DivSgn;
+    logic Qs;
 
     // is the result negitive
     //  if p - z is the Sum negitive
@@ -52,9 +52,9 @@ module roundsign(
 
     // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
 
-    assign DivSgn = Xs^Ys;
+    assign Qs = Xs^Ys;
 
     // Sign for rounding calulation
-    assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
+    assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
similarity index 72%
rename from pipelined/src/fpu/lzacorrection.sv
rename to pipelined/src/fpu/shiftcorrection.sv
index 03b36f4f..71a2393a 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -28,23 +28,22 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 
-module lzacorrection(
+module shiftcorrection(
     input logic  [`NORMSHIFTSZ-1:0] Shifted,         // the shifted sum before LZA correction
     input logic                     FmaOp,
     input logic                     DivOp,
     input logic                     DivResDenorm,
-    input logic  [`NE+1:0]          DivCalcExp,
+    input logic  [`NE+1:0]          DivQe,
     input logic  [`NE+1:0]          DivDenormShift,
-    input logic  [`NE+1:0]          FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic  [`NE+1:0]          FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
-    input logic                     FmaKillProd,  // is the product set to zero
-    input logic                     FmaSmZero,
-    output logic [`CORRSHIFTSZ-1:0] Nfrac,         // the shifted sum before LZA correction
-    output logic [`NE+1:0]          DivCorrExp,
+    input logic                     FmaSZero,
+    output logic [`CORRSHIFTSZ-1:0] Mf,         // the shifted sum before LZA correction
+    output logic [`NE+1:0]          Qe,
     output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
-    logic [`CORRSHIFTSZ:0] CorrQuotShifted;
+    logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
     logic                  ResDenorm;    // is the result denormalized
     logic                  LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
 
@@ -54,16 +53,16 @@ module lzacorrection(
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
     assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQuotShifted =  {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
+    assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
-    assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}};
+    assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
     // the quotent is in the range [.5,2) if there is no early termination
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
-    assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
+    assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/specialcase.sv
similarity index 97%
rename from pipelined/src/fpu/resultselect.sv
rename to pipelined/src/fpu/specialcase.sv
index 4389056f..3c28eae2 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/specialcase.sv
@@ -29,17 +29,17 @@
 
 `include "wally-config.vh"
 
-module resultselect(
+module specialcase(
     input logic                 Xs,        // input signs
     input logic  [`NF:0]        Xm, Ym, Zm, // input mantissas
     input logic                 XNaN, YNaN, ZNaN,    // inputs are NaN
     input logic  [2:0]          Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic  [`FMTBITS-1:0] OutFmt,       // output format
     input logic                 InfIn,
+    input logic                 NaNIn,
     input logic                 XInf, YInf,
     input logic                 XZero,
     input logic                 IntZero,
-    input logic                 NaNIn,
     input logic                 IntToFp,
     input logic                 Int64,
     input logic                 Signed,
@@ -53,10 +53,10 @@ module resultselect(
     input logic                 IntInvalid, Invalid, Overflow,  // flags
     input logic                 CvtResUf,
     input logic  [`NE-1:0]      Re,          // Res exponent
-    input logic  [`NE+1:0]      FullResExp,          // Res exponent
+    input logic  [`NE+1:0]      FullRe,          // Res exponent
     input logic  [`NF-1:0]      Rf,         // Res fraction
     input logic  [`XLEN+1:0]    CvtNegRes,     // the negation of the result
-    output logic [`FLEN-1:0]    W,     // final res
+    output logic [`FLEN-1:0]    PostProcRes,     // final res
     output logic [`XLEN-1:0]    FCvtIntRes     // final res
 );
     logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
@@ -231,11 +231,11 @@ module resultselect(
     //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
     //      - dont set to zero if fp input is zero but not using the fp input
     //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
+    assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
     assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
     // output infinity with result sign if divide by zero
     if(`IEEE754) begin
-        assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
+        assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
                          YNaN&~CvtOp ? YNaNRes :
                          ZNaN&FmaOp ? ZNaNRes :
                          Invalid ? InvalidRes : 
@@ -243,7 +243,7 @@ module resultselect(
                          KillRes ? UfRes :  
                          NormRes;
     end else begin
-        assign W = NaNIn|Invalid ? InvalidRes :
+        assign PostProcRes = NaNIn|Invalid ? InvalidRes :
                          SelOfRes ? OfRes :
                          KillRes ? UfRes :  
                          NormRes;
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
deleted file mode 100644
index 741d4e83..00000000
--- a/pipelined/src/fpu/srt-radix4.sv
+++ /dev/null
@@ -1,312 +0,0 @@
-///////////////////////////////////////////
-// srt.sv
-//
-// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
-// Modified:13 January 2022
-//
-// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// MIT LICENSE
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
-// software and associated documentation files (the "Software"), to deal in the Software 
-// without restriction, including without limitation the rights to use, copy, modify, merge, 
-// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
-// to whom the Software is furnished to do so, subject to the following conditions:
-//
-//   The above copyright notice and this permission notice shall be included in all copies or 
-//   substantial portions of the Software.
-//
-//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
-//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
-//   OR OTHER DEALINGS IN THE SOFTWARE.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module srtradix4 (
-  input  logic clk,
-  input  logic DivStart, 
-  input  logic DivBusy, 
-  input logic  [`FMTBITS-1:0] FmtE,
-  input  logic [`NE-1:0] XExpE, YExpE,
-  input  logic XZeroE, YZeroE, 
-  input logic [`DIVLEN-1:0] X,
-  input logic [`DIVLEN-1:0] Dpreproc,
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [`DIVLEN+2:0] Quot,
-  output logic [`DIVLEN+3:0]  WSN, WCN,
-  output logic [`DIVLEN+3:0]  WS, WC,
-  output logic  [`NE+1:0] DivCalcExpM,
-  output logic [`XLEN-1:0] Rem
-);
-
-  logic [3:0]     q;
-  logic [`DIVLEN+3:0]  WSA;
-  logic [`DIVLEN+3:0]  WCA;
-  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
-  logic [`NE+1:0] DivCalcExp;
-  logic [$clog2(`XLEN+1)-1:0] intExp;
-  logic           intSign;
-
-  // Top Muxes and Registers
-  // When start is asserted, the inputs are loaded into the divider.
-  // Otherwise, the divisor is retained and the partial remainder
-  // is fed back for the next iteration.
-  //  - when the start signal is asserted X and 0 are loaded into WS and WC
-  //  - otherwise load WSA into the flipflop
-  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
-  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
-  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
-  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
-  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
-  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
-  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
-
-  // Quotient Selection logic
-  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  // *** change this for radix 4 - generate w/ stine code
-  // q encoding:
-	// 1000 = +2
-	// 0100 = +1
-	// 0000 =  0
-	// 0010 = -1
-	// 0001 = -2
-  qsel4 qsel4(.D, .WS, .WC, .q);
-
-  // Divisor Selection logic
-  // *** radix 4 change to choose -2 to 2
-  // - choose the negitive version of what's being selected
-  assign DBar = ~D;
-  assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
-  assign D2 = {D[`DIVLEN+2:0], 1'b0};
-
-  always_comb
-    case (q)
-      4'b1000: Dsel = DBar2;
-      4'b0100: Dsel = DBar;
-      4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
-      4'b0010: Dsel = D;
-      4'b0001: Dsel = D2;
-      default: Dsel = {`DIVLEN+4{1'bx}};
-    endcase
-
-  // Partial Product Generation
-  //  WSA, WCA = WS + WC - qD
-  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
-  
-  //*** change for radix 4
-  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
-
-  expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
-
-endmodule
-
-////////////////
-// Submodules //
-////////////////
-
-
-
-module qsel4 (
-	input logic [`DIVLEN+3:0] D,
-	input logic [`DIVLEN+3:0] WS, WC,
-	output logic [3:0] q
-);
-	logic [6:0] Wmsbs;
-	logic [7:0] PreWmsbs;
-	logic [2:0] Dmsbs;
-	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
-	assign Wmsbs = PreWmsbs[7:1];
-	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
-	// D = 0001.xxx...
-	// Dmsbs = |   |
-  // W =      xxxx.xxx...
-	// Wmsbs = |        |
-
-	logic [3:0] QSel4[1023:0];
-
-  initial begin 
-    integer d, w, i, w2;
-    for(d=0; d<8; d++)
-      for(w=0; w<128; w++)begin
-        i = d*128+w;
-        w2 = w-128*(w>=64); // convert to two's complement
-        case(d)
-          0: if($signed(w2)>=$signed(12))      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-4)  QSel4[i] = 4'b0000; 
-            else if(w2>=-13) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          1: if(w2>=14)      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-15) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          2: if(w2>=15)      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-16) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          3: if(w2>=16)      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-18) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          4: if(w2>=18)      QSel4[i] = 4'b1000;
-            else if(w2>=6)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-20) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          5: if(w2>=20)      QSel4[i] = 4'b1000;
-            else if(w2>=6)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-20) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          6: if(w2>=20)      QSel4[i] = 4'b1000;
-            else if(w2>=8)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-22) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          7: if(w2>=24)      QSel4[i] = 4'b1000;
-            else if(w2>=8)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-24) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-        endcase
-      end
-  end
-	assign q = QSel4[{Dmsbs,Wmsbs}];
-	
-endmodule
-
-///////////////////////////////////
-// On-The-Fly Converter, Radix 2 //
-///////////////////////////////////
-module otfc4 (
-  input  logic         clk,
-  input  logic         DivStart,
-  input  logic         DivBusy,
-  input  logic [3:0]   q,
-  output logic [`DIVLEN+2:0] Quot
-);
-
-  //  The on-the-fly converter transfers the quotient 
-  //  bits to the quotient as they come. 
-  //
-  //  This code follows the psuedocode presented in the 
-  //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-4 division.
-  //
-  //  QM is Q-1. It allows us to write negative bits 
-  //  without using a costly CPA. 
-  logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
-  //  QR and QMR are the shifted versions of Q and QM.
-  //  They are treated as [N-1:r] size signals, and 
-  //  discard the r most significant bits of Q and QM. 
-  logic [`DIVLEN:0] QR, QMR;
-  // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
-  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
-  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
-
-  // shift Q (quotent) and QM (quotent-1)
-		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
-		// else if 	q = 1   Q = {Q, 01} 	QM = {Q, 00}	
-		// else if 	q = 0   Q = {Q, 00} 	QM = {QM, 11}	
-		// else if 	q = -1	Q = {QM, 11} 	QM = {QM, 10}
-		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
-    // *** how does the 0 concatination numbers work?
-
-  always_comb begin
-    QR  = Quot[`DIVLEN:0];
-    QMR = QM[`DIVLEN:0];     // Shift Q and QM
-    if (q[3]) begin // +2
-      QNext  = {QR,  2'b10};
-      QMNext = {QR,  2'b01};
-    end else if (q[2]) begin // +1
-      QNext  = {QR,  2'b01};
-      QMNext = {QR,  2'b00};
-    end else if (q[1]) begin // -1
-      QNext  = {QMR,  2'b11};
-      QMNext = {QMR,  2'b10};
-    end else if (q[0]) begin // -2
-      QNext  = {QMR,  2'b10};
-      QMNext = {QMR,  2'b01};
-    end else begin           // 0
-      QNext  = {QR,  2'b00};
-      QMNext = {QMR, 2'b11};
-    end 
-  end
-  // Final Quoteint is in the range [.5, 2)
-
-endmodule
-
-
-
-/////////
-// csa //
-/////////
-module csa #(parameter N=69) (
-  input  logic [N-1:0] in1, in2, in3, 
-  input  logic         cin, 
-  output logic [N-1:0] out1, out2
-);
-
-  // This block adds in1, in2, in3, and cin to produce 
-  // a result out1 / out2 in carry-save redundant form.
-  // cin is just added to the least significant bit and
-  // is Startuired to handle adding a negative divisor.
-  // Fortunately, the carry (out2) is shifted left by one
-  // bit, leaving room in the least significant bit to 
-  // insert cin.
-
-  assign out1 = in1 ^ in2 ^ in3;
-  assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
-		    (in2[N-2:0] & in3[N-2:0]), cin};
-endmodule
-
-module expcalc(
-  input logic  [`FMTBITS-1:0] FmtE,
-  input  logic [`NE-1:0] XExpE, YExpE,
-  input logic XZeroE, 
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic  [`NE+1:0] DivCalcExp
-  );
-    logic [`NE-2:0] Bias;
-    
-    if (`FPSIZES == 1) begin
-        assign Bias = (`NE-1)'(`BIAS); 
-
-    end else if (`FPSIZES == 2) begin
-        assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (FmtE)
-                `FMT: Bias  =  (`NE-1)'(`BIAS);
-                `FMT1: Bias = (`NE-1)'(`BIAS1);
-                `FMT2: Bias = (`NE-1)'(`BIAS2);
-                default: Bias = 'x;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (FmtE)
-                2'h3: Bias =  (`NE-1)'(`Q_BIAS);
-                2'h1: Bias =  (`NE-1)'(`D_BIAS);
-                2'h0: Bias =  (`NE-1)'(`S_BIAS);
-                2'h2: Bias =  (`NE-1)'(`H_BIAS);
-            endcase
-    end
-    // correct exponent for denormalized input's normalization shifts
-    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
-    endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
new file mode 100644
index 00000000..9e031511
--- /dev/null
+++ b/pipelined/src/fpu/srt.sv
@@ -0,0 +1,259 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu 
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module srt(
+  input  logic clk,
+  input  logic DivStart, 
+  input  logic DivBusy, 
+  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`NE-1:0] Xe, Ye,
+  input  logic XZeroE, YZeroE, 
+  input logic [`DIVLEN-1:0] X,
+  input logic [`DIVLEN-1:0] Dpreproc,
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input logic NegSticky,
+  output logic [`QLEN-1-(`RADIX/4):0] Quot,
+  output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
+  output logic [`DIVLEN+3:0]  StickyWSA,
+  output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
+  output logic  [`NE+1:0] DivCalcExpM,
+  output logic [`XLEN-1:0] Rem
+);
+
+
+ /* verilator lint_off UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WCA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WS[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WC[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
+ /* verilator lint_on UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSN, WCN;
+  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
+  logic [`NE+1:0] DivCalcExp;
+  logic [$clog2(`XLEN+1)-1:0] intExp;
+  logic           intSign;
+  logic [`QLEN-1:0] QMMux;
+
+  // Top Muxes and Registers
+  // When start is asserted, the inputs are loaded into the divider.
+  // Otherwise, the divisor is retained and the partial remainder
+  // is fed back for the next iteration.
+  //  - when the start signal is asserted X and 0 are loaded into WS and WC
+  //  - otherwise load WSA into the flipflop
+  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
+  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
+  if (`RADIX == 2) begin : nextw
+    assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
+    assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
+  end else begin
+    assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+    assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+  end
+
+  mux2   #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  flopen   #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
+  mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
+  flopen   #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
+  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
+  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
+
+
+  // Divisor Selections
+  // - choose the negitive version of what's being selected
+  assign DBar = ~D;
+  if(`RADIX == 4) begin : d2
+    assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
+    assign D2 = {D[`DIVLEN+2:0], 1'b0};
+  end
+
+  genvar i;
+  generate
+    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
+      divinteration divinteration(.D, .DBar, .D2, .DBar2, 
+      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
+      if(i<(`DIVCOPIES-1)) begin 
+        if (`RADIX==2)begin 
+          assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0};
+          assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0};
+        end else begin
+          assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
+          assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
+        end
+        assign Q[i+1] = QNext[i];
+        assign QM[i+1] = QMNext[i];
+      end
+    end
+  endgenerate
+
+  // if starting a new divison set Q to 0 and QM to -1
+  mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
+  flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
+  flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
+
+  assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
+  assign FirstWS = WS[0];
+  assign FirstWC = WC[0];
+  if(`RADIX==2)
+    if (`DIVCOPIES == 1)
+      assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0};
+    else
+      assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
+
+  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
+
+endmodule
+
+////////////////
+// Submodules //
+////////////////
+
+ /* verilator lint_off UNOPTFLAT */
+module divinteration (
+  input logic [`DIVLEN+3:0] D,
+  input logic [`DIVLEN+3:0]  DBar, D2, DBar2,
+  input logic [`QLEN-1:0] Q, QM,
+  input logic [`DIVLEN+3:0]  WS, WC,
+  output logic [`QLEN-1:0] QNext, QMNext, 
+  output logic [`DIVLEN+3:0]  WSA, WCA
+);
+ /* verilator lint_on UNOPTFLAT */
+
+  logic [`DIVLEN+3:0]  Dsel;
+  logic [3:0]     q;
+  logic qp, qz;//, qn;
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  // q encoding:
+	// 1000 = +2
+	// 0100 = +1
+	// 0000 =  0
+	// 0010 = -1
+	// 0001 = -2
+  if(`RADIX == 2) begin : qsel
+    qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn);
+  end else begin
+    qsel4 qsel4(.D, .WS, .WC, .q);
+  end
+
+  if(`RADIX == 2) begin : dsel
+    assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D);
+  end else begin
+    always_comb
+      case (q)
+        4'b1000: Dsel = DBar2;
+        4'b0100: Dsel = DBar;
+        4'b0000: Dsel = '0;
+        4'b0010: Dsel = D;
+        4'b0001: Dsel = D2;
+        default: Dsel = 'x;
+      endcase
+  end
+  // Partial Product Generation
+  //  WSA, WCA = WS + WC - qD
+  if (`RADIX == 2) begin : csa
+    csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
+  end else begin
+    csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
+  end
+
+  if (`RADIX == 2) begin : otfc
+    otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
+  end else begin
+    otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
+  end
+
+endmodule
+
+
+/////////
+// csa //
+/////////
+module csa #(parameter N=69) (
+  input  logic [N-1:0] in1, in2, in3, 
+  input  logic         cin, 
+  output logic [N-1:0] out1, out2
+);
+
+  // This block adds in1, in2, in3, and cin to produce 
+  // a result out1 / out2 in carry-save redundant form.
+  // cin is just added to the least significant bit and
+  // is Startuired to handle adding a negative divisor.
+  // Fortunately, the carry (out2) is shifted left by one
+  // bit, leaving room in the least significant bit to 
+  // insert cin.
+
+  assign out1 = in1 ^ in2 ^ in3;
+  assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
+		    (in2[N-2:0] & in3[N-2:0]), cin};
+endmodule
+
+module expcalc(
+  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`NE-1:0] Xe, Ye,
+  input logic XZeroE, 
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  output logic  [`NE+1:0] DivCalcExp
+  );
+    logic [`NE-2:0] Bias;
+    
+    if (`FPSIZES == 1) begin
+        assign Bias = (`NE-1)'(`BIAS); 
+
+    end else if (`FPSIZES == 2) begin
+        assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (FmtE)
+                `FMT: Bias  =  (`NE-1)'(`BIAS);
+                `FMT1: Bias = (`NE-1)'(`BIAS1);
+                `FMT2: Bias = (`NE-1)'(`BIAS2);
+                default: Bias = 'x;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (FmtE)
+                2'h3: Bias =  (`NE-1)'(`Q_BIAS);
+                2'h1: Bias =  (`NE-1)'(`D_BIAS);
+                2'h0: Bias =  (`NE-1)'(`S_BIAS);
+                2'h2: Bias =  (`NE-1)'(`H_BIAS);
+            endcase
+    end
+    // correct exponent for denormalized input's normalization shifts
+    assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 008b234d..634ecc1d 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -33,37 +33,45 @@
 module srtfsm(
   input  logic clk, 
   input  logic reset, 
-  input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
+  input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
   input  logic XInfE, YInfE, 
   input  logic XZeroE, YZeroE, 
   input  logic XNaNE, YNaNE, 
   input  logic DivStart, 
-  input logic StallE,
-  input logic StallM,
-  input  logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
+  input  logic StallE,
+  input  logic StallM,
+  input  logic [`DIVLEN+3:0] StickyWSA,
+  input  logic [`DURLEN-1:0] Dur,
+  output logic [`DURLEN-1:0] EarlyTermShiftE,
   output logic DivStickyE,
   output logic DivDone,
-  output logic DivNegStickyE,
+  output logic NegSticky,
   output logic DivBusy
   );
   
   typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
   statetype state;
 
-  logic [$clog2(`DIVLEN/2+3)-1:0] step;
+  logic [`DURLEN-1:0] step;
   logic WZero;
   //logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
   logic [`DIVLEN+3:0] W;
 
   //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
   assign DivBusy = (state == BUSY);
-  assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
-  assign DivStickyE = ~WZero;
+  assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
+  // calculate sticky bit
+  //    - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
+  //      this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
+  //      radix-4 division can't create a QM that continually adds 0's
+  if (`RADIX == 2)
+    assign DivStickyE = |W&~(StickyWSA == WS);
+  else
+    assign DivStickyE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
-  assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
-  assign EarlyTermShiftDiv2E = step;
+  assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
+  assign EarlyTermShiftE = step;
 
   always_ff @(posedge clk) begin
       if (reset) begin
@@ -73,7 +81,7 @@ module srtfsm(
           if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
           else         state <= #1 BUSY;
       end else if (state == BUSY) begin
-          if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
+          if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
               state <= #1 DONE;
           end
           step <= step - 1;
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index d17d2abd..b9fb8bb8 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -31,11 +31,11 @@
 `include "wally-config.vh"
 
 module srtpreproc (
-  input  logic [`NF:0] XManE, YManE,
+  input  logic [`NF:0] Xm, Ym,
   output logic [`DIVLEN-1:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
+  output logic [`DURLEN-1:0] Dur
 );
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
@@ -49,24 +49,33 @@ module srtpreproc (
 
   // ***can probably merge X LZC with conversion
   // cout the number of leading zeros
-  lzc #(`NF+1) lzcA (XManE, XZeroCnt);
-  lzc #(`NF+1) lzcB (YManE, YZeroCnt);
+  lzc #(`NF+1) lzcA (Xm, XZeroCnt);
+  lzc #(`NF+1) lzcB (Ym, YZeroCnt);
 
   // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
   // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
 
   // assign PreprocA = ExtraA << zeroCntA;
   // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = {Xm[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {Ym[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
 
   
   assign X = PreprocX;
   assign Dpreproc = PreprocY;
-
-  assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
+  assign Dur = (`DURLEN)'(`FPDUR);
   // assign intExp = zeroCntB - zeroCntA + 1;
   // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
+  //           radix 2     radix 4
+  // 1 copies  DIVLEN+2    DIVLEN+2/2
+  // 2 copies  DIVLEN+2/2  DIVLEN+2/2*2
+  // 4 copies  DIVLEN+2/4  DIVLEN+2/2*4
+  // 8 copies  DIVLEN+2/8  DIVLEN+2/2*8
+
+  // DIVRESLEN = DIVLEN or DIVLEN+2
+  // r = 1 or 2
+  // DIVRESLEN/(r*`DIVCOPIES)
+
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv
index 9f6e5981..71aabbc6 100644
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
 /* verilator lint_off CMPCONST */
 /* verilator lint_off WIDTH */
     
-    int i;
+    logic [31:0] i;
     always_comb begin
         i = 0;
         while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1;  // search for leading one
diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv
index 0c43c736..5c2f799d 100644
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@@ -226,7 +226,7 @@ module ifu (
       icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
              .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
              .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), 
-             .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
+             .CacheFetchLine(ICacheFetchLine), .FStore2(),
              .CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
              .Cacheable(CacheableF),
              .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv
index 4b200f70..e9f41e65 100644
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@@ -58,7 +58,7 @@ module lsu (
    input logic              sfencevmaM,
    // fpu
    input logic [`FLEN-1:0]  FWriteDataM,
-   input logic              FLoad2,
+   input logic              FStore2,
    input logic              FpLoadStoreM,
    // faults
    output logic             LoadPageFaultM, StoreAmoPageFaultM,
@@ -192,7 +192,8 @@ module lsu (
   //  Memory System
   //  Either Data Cache or Data Tightly Integrated Memory or just bus interface
   /////////////////////////////////////////////////////////////////////////////////////////////
-  logic [`XLEN-1:0]    AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
+  logic [`XLEN-1:0]    AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
+  logic [`LLEN-1:0]    FinalWriteDataM;
   logic [`LLEN-1:0]    ReadDataWordM, LittleEndianReadDataWordM;
   logic [`LLEN-1:0]    ReadDataWordMuxM;
   logic                IgnoreRequest;
@@ -202,7 +203,7 @@ module lsu (
   if (`DMEM == `MEM_TIM) begin : dtim
     // *** directly instantiate RAM or ROM here.  Instantiate SRAM1P1RW.  
     // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
-    dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, 
+    dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
               .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
               .DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
               .DCacheMiss, .DCacheAccess);
@@ -230,15 +231,19 @@ module lsu (
 
     mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
       .s(SelUncachedAdr), .y(ReadDataWordMuxM));
-    mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
+    mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
       .s(SelUncachedAdr), .y(LSUBusHWDATA));
     
     if(CACHE_ENABLED) begin : dcache
+      if (`LLEN>`XLEN)
+        mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
+      else
+        assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
       cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
               .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
         .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
         .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), 
-        .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
+        .ByteMask(ByteMaskM), .WordCount, .FStore2,
         .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
         .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
         .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), 
@@ -286,10 +291,10 @@ module lsu (
   //  swap the bytes when read from big-endian memory
   /////////////////////////////////////////////////////////////////////////////////////////////
   if (`BIGENDIAN_SUPPORTED) begin:endian
-    bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
+    bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
     bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
   end else begin
-    assign FinalWriteDataM = LittleEndianWriteDataM;
+    assign IEUWriteDataM = LittleEndianWriteDataM;
     assign LittleEndianReadDataWordM = ReadDataWordM;
   end
 
diff --git a/pipelined/src/uncore/plic_apb.sv b/pipelined/src/uncore/plic_apb.sv
index f83033c4..51e94d7f 100644
--- a/pipelined/src/uncore/plic_apb.sv
+++ b/pipelined/src/uncore/plic_apb.sv
@@ -172,8 +172,8 @@ module plic_apb (
   end
 
   // pending interrupt requests
-  //assign nextIntPending = (intPending | requests) & ~intInProgress; // 
-  assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
+  assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots.  Confirmed to boot successfully.
+  //assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
   flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
 
   // context-dependent signals
diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv
index 7538a541..372f4aba 100644
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@@ -93,7 +93,7 @@ module wallypipelinedcore (
   logic             FStallD;
   logic             FWriteIntE;
   logic [`XLEN-1:0]         FWriteDataE;
-  logic                     FLoad2;
+  logic                     FStore2;
   logic [`FLEN-1:0]         FWriteDataM;
   logic [`XLEN-1:0]         FIntResM;  
   logic [`XLEN-1:0]         FCvtIntResW;  
@@ -259,7 +259,7 @@ module wallypipelinedcore (
   .CommittedM, .DCacheMiss, .DCacheAccess,
   .SquashSCW,            
   .FpLoadStoreM,
-  .FWriteDataM, .FLoad2,
+  .FWriteDataM, .FStore2,
   //.DataMisalignedM(DataMisalignedM),
   .IEUAdrE, .IEUAdrM, .WriteDataE,
   .ReadDataW, .FlushDCacheM,
@@ -400,7 +400,7 @@ module wallypipelinedcore (
          .STATUS_FS, // is floating-point enabled?
          .FRegWriteM, // FP register write enable
          .FpLoadStoreM,
-         .FLoad2,
+         .FStore2,
          .FStallD, // Stall the decode stage
          .FWriteIntE, // integer register write enable
          .FWriteDataE, // Data to be written to memory
diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c
index 61fe74aa..d6bebb77 100644
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp,
   // Print r in standard double format
   fprintf(fptr, "%03x", rExp|(rSign<<11));
   printhex(fptr, rFrac);
+  fprintf(fptr, "_");
+
+  // Spacing for testbench, value doesn't matter
+  fprintf(fptr, "%016x", 0);
   fprintf(fptr, "\n");
 }
 
diff --git a/pipelined/srt/lint-srt b/pipelined/srt/lint-srt
index fd42df88..399201be 100755
--- a/pipelined/srt/lint-srt
+++ b/pipelined/srt/lint-srt
@@ -1,2 +1 @@
 verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
-verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
diff --git a/pipelined/srt/qslc_r4a2.c b/pipelined/srt/qslc_r4a2.c
deleted file mode 100644
index 8e68f998..00000000
--- a/pipelined/srt/qslc_r4a2.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
-  Program:      qslc_r4a2.c
-  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
-  User:         James E. Stine
-
-*/
-
-#include <stdio.h>
-#include <math.h>
-
-#define DIVISOR_SIZE 3
-#define CARRY_SIZE 7
-#define SUM_SIZE 7
-#define TOT_SIZE 7
-
-void disp_binary(double, int, int);
-
-struct bits {
-  unsigned int divisor : DIVISOR_SIZE;
-  int tot : TOT_SIZE;
-} pla;
-
-/* 
-
-   Function:      disp_binary
-   Description:   This function displays a Double-Precision number into
-   four 16 bit integers using the global union variable 
-   dp_number
-   Argument List: double x            The value to be converted
-   int bits_to_left    Number of bits left of radix point
-   int bits_to_right   Number of bits right of radix point
-   Return value:  none
-
-*/
-void disp_binary(double x, int bits_to_left, int bits_to_right) {
-  int i; 
-  double diff;
-
-  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
-    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-      printf("0");
-    }
-    if (i == bits_to_right+1) 
-      ;
-    
-    return;
-  }
-
-  if (x < 0.0) 
-    x = pow(2.0, ((double) bits_to_left)) + x;
-
-  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-    diff = pow(2.0, ((double) -i) );
-    if (x < diff) 
-      printf("0");
-    else {
-      printf("1");
-      x -= diff;
-    }
-    if (i == 0) 
-      ;
-    
-  }
-
-}
-
-int main() {
-  int m;
-  int n;
-  int o;
-  pla.divisor = 0;
-  pla.tot = 0;
-  printf("\tcase({D[5:3],Wmsbs})\n");
-  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
-    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
-      printf("\t\t10'b");
-      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
-      printf("_");
-      disp_binary((double) pla.tot, TOT_SIZE, 0);
-      printf(": q = 4'b");
-
-      /*
-	4 bits for Radix 4 (a=2)
-	1000 = +2
-	0100 = +1
-	0000 =  0
-	0010 = -1
-	0001 = -2		
-      */
-      switch (pla.divisor) {
-      case 0:
-	if ((pla.tot) >= 12)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -4)
-	  printf("0000");
-	else if ((pla.tot) >= -13)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 1:
-	if ((pla.tot) >= 14)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -6)
-	  printf("0000");
-	else if ((pla.tot) >= -15)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 2:
-	if ((pla.tot) >= 15)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -6)
-	  printf("0000");
-	else if ((pla.tot) >= -16)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 3:
-	if ((pla.tot) >= 16)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -6)
-	  printf("0000");
-	else if ((pla.tot) >= -18)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 4:
-	if ((pla.tot) >= 18)
-	  printf("1000");
-	else if ((pla.tot) >= 6)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -20)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 5:
-	if ((pla.tot) >= 20)
-	  printf("1000");
-	else if ((pla.tot) >= 6)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -20)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 6:
-	if ((pla.tot) >= 20)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -22)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 7:
-	if ((pla.tot) >= 24)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -24)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      default: printf ("XXX");
-			
-      }
-			
-      printf(";\n");
-      (pla.tot)++;
-    }
-    (pla.divisor)++;
-  }
-  printf("\tendcase\n");
-  
-}
diff --git a/pipelined/srt/qslc_r4a2b b/pipelined/srt/qslc_r4a2b
deleted file mode 100755
index f719bbf4..00000000
Binary files a/pipelined/srt/qslc_r4a2b and /dev/null differ
diff --git a/pipelined/srt/qslc_r4a2b.c b/pipelined/srt/qslc_r4a2b.c
deleted file mode 100644
index 94a3a4cd..00000000
--- a/pipelined/srt/qslc_r4a2b.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
-  Program:      qslc_r4a2.c
-  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
-  User:         James E. Stine
-
-*/
-
-#include <stdio.h>
-#include <math.h>
-
-#define DIVISOR_SIZE 3
-#define CARRY_SIZE 7
-#define SUM_SIZE 7
-#define TOT_SIZE 7
-
-void disp_binary(double, int, int);
-
-struct bits {
-  unsigned int divisor : DIVISOR_SIZE;
-  int tot : TOT_SIZE;
-} pla;
-
-/* 
-
-   Function:      disp_binary
-   Description:   This function displays a Double-Precision number into
-   four 16 bit integers using the global union variable 
-   dp_number
-   Argument List: double x            The value to be converted
-   int bits_to_left    Number of bits left of radix point
-   int bits_to_right   Number of bits right of radix point
-   Return value:  none
-
-*/
-void disp_binary(double x, int bits_to_left, int bits_to_right) {
-  int i; 
-  double diff;
-
-  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
-    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-      printf("0");
-    }
-    if (i == bits_to_right+1) 
-      ;
-    
-    return;
-  }
-
-  if (x < 0.0) 
-    x = pow(2.0, ((double) bits_to_left)) + x;
-
-  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-    diff = pow(2.0, ((double) -i) );
-    if (x < diff) 
-      printf("0");
-    else {
-      printf("1");
-      x -= diff;
-    }
-    if (i == 0) 
-      ;
-    
-  }
-
-}
-
-int main() {
-  int m;
-  int n;
-  int o;
-  pla.divisor = 0;
-  pla.tot = 0;
-  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
-    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
-      /*
-	4 bits for Radix 4 (a=2)
-	1000 = +2
-	0100 = +1
-	0000 =  0
-	0010 = -1
-	0001 = -2		
-      */
-      switch (pla.divisor) {
-      case 0:
-	if ((pla.tot) >= 12)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -4)
-	  printf("0");
-	else if ((pla.tot) >= -13)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 1:
-	if ((pla.tot) >= 14)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -6)
-	  printf("0");
-	else if ((pla.tot) >= -15)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 2:
-	if ((pla.tot) >= 15)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -6)
-	  printf("0");
-	else if ((pla.tot) >= -16)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 3:
-	if ((pla.tot) >= 16)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -6)
-	  printf("0");
-	else if ((pla.tot) >= -18)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 4:
-	if ((pla.tot) >= 18)
-	  printf("8");
-	else if ((pla.tot) >= 6)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -20)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 5:
-	if ((pla.tot) >= 20)
-	  printf("8");
-	else if ((pla.tot) >= 6)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -20)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 6:
-	if ((pla.tot) >= 20)
-	  printf("8");
-	else if ((pla.tot) >= 8)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -22)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 7:
-	if ((pla.tot) >= 24)
-	  printf("8");
-	else if ((pla.tot) >= 8)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -24)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      default: printf ("X");
-			
-      }
-			
-      printf("\n");
-      (pla.tot)++;
-    }
-    (pla.divisor)++;
-  }
-  
-}
diff --git a/pipelined/srt/qslc_r4a2b.tv b/pipelined/srt/qslc_r4a2b.tv
deleted file mode 100644
index b92d81e8..00000000
--- a/pipelined/srt/qslc_r4a2b.tv
+++ /dev/null
@@ -1,1024 +0,0 @@
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2
deleted file mode 100755
index 5cff70cd..00000000
Binary files a/pipelined/srt/qslc_sqrt_r4a2 and /dev/null differ
diff --git a/pipelined/srt/qslc_sqrt_r4a2.c b/pipelined/srt/qslc_sqrt_r4a2.c
deleted file mode 100644
index 252293cc..00000000
--- a/pipelined/srt/qslc_sqrt_r4a2.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
-  Program:      qslc_r4a2.c
-  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
-  User:         James E. Stine
-
-*/
-
-#include <stdio.h>
-#include <math.h>
-
-#define DIVISOR_SIZE 3
-#define CARRY_SIZE 7
-#define SUM_SIZE 7
-#define TOT_SIZE 7
-
-void disp_binary(double, int, int);
-
-struct bits {
-  unsigned int divisor : DIVISOR_SIZE;
-  int tot : TOT_SIZE;
-} pla;
-
-/* 
-
-   Function:      disp_binary
-   Description:   This function displays a Double-Precision number into
-   four 16 bit integers using the global union variable 
-   dp_number
-   Argument List: double x            The value to be converted
-   int bits_to_left    Number of bits left of radix point
-   int bits_to_right   Number of bits right of radix point
-   Return value:  none
-
-*/
-void disp_binary(double x, int bits_to_left, int bits_to_right) {
-  int i; 
-  double diff;
-
-  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
-    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-      printf("0");
-    }
-    if (i == bits_to_right+1) 
-      ;
-    
-    return;
-  }
-
-  if (x < 0.0) 
-    x = pow(2.0, ((double) bits_to_left)) + x;
-
-  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-    diff = pow(2.0, ((double) -i) );
-    if (x < diff) 
-      printf("0");
-    else {
-      printf("1");
-      x -= diff;
-    }
-    if (i == 0) 
-      ;
-    
-  }
-
-}
-
-int main() {
-  int m;
-  int n;
-  int o;
-  pla.divisor = 0;
-  pla.tot = 0;
-  printf("\tcase({D[5:3],Wmsbs})\n");
-  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
-    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
-      printf("\t\t11'b");
-      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
-      printf("_");
-      disp_binary((double) pla.tot, TOT_SIZE, 0);
-      printf(": q = 4'b");
-
-      /*
-	4 bits for Radix 4 (a=2)
-	1000 = +2
-	0100 = +1
-	0000 =  0
-	0010 = -1
-	0001 = -2		
-      */
-      switch (pla.divisor) {
-      case 0:
-	if ((pla.tot) >= 24)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -26)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 1:
-	if ((pla.tot) >= 28)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -10)
-	  printf("0000");
-	else if ((pla.tot) >= -28)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 2:
-	if ((pla.tot) >= 32)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -12)
-	  printf("0000");
-	else if ((pla.tot) >= -32)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 3:
-	if ((pla.tot) >= 32)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -12)
-	  printf("0000");
-	else if ((pla.tot) >= -34)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 4:
-	if ((pla.tot) >= 36)
-	  printf("1000");
-	else if ((pla.tot) >= 12)
-	  printf("0100");
-	else if ((pla.tot) >= -12)
-	  printf("0000");
-	else if ((pla.tot) >= -36)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 5:
-	if ((pla.tot) >= 40)
-	  printf("1000");
-	else if ((pla.tot) >= 12)
-	  printf("0100");
-	else if ((pla.tot) >= -16)
-	  printf("0000");
-	else if ((pla.tot) >= -40)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 6:
-	if ((pla.tot) >= 40)
-	  printf("1000");
-	else if ((pla.tot) >= 16)
-	  printf("0100");
-	else if ((pla.tot) >= -16)
-	  printf("0000");
-	else if ((pla.tot) >= -44)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 7:
-	if ((pla.tot) >= 44)
-	  printf("1000");
-	else if ((pla.tot) >= 16)
-	  printf("0100");
-	else if ((pla.tot) >= -16)
-	  printf("0000");
-	else if ((pla.tot) >= -46)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      default: printf ("XXX");
-			
-      }
-			
-      printf(";\n");
-      (pla.tot)++;
-    }
-    (pla.divisor)++;
-  }
-  printf("\tendcase\n");
-  
-}
diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv
deleted file mode 100644
index 805dbbae..00000000
--- a/pipelined/srt/qslc_sqrt_r4a2.sv
+++ /dev/null
@@ -1,1026 +0,0 @@
-	case({D[5:3],Wmsbs})
-		11'b000_0000000: q = 4'b0000;
-		11'b000_0000001: q = 4'b0000;
-		11'b000_0000010: q = 4'b0000;
-		11'b000_0000011: q = 4'b0000;
-		11'b000_0000100: q = 4'b0000;
-		11'b000_0000101: q = 4'b0000;
-		11'b000_0000110: q = 4'b0000;
-		11'b000_0000111: q = 4'b0000;
-		11'b000_0001000: q = 4'b0100;
-		11'b000_0001001: q = 4'b0100;
-		11'b000_0001010: q = 4'b0100;
-		11'b000_0001011: q = 4'b0100;
-		11'b000_0001100: q = 4'b0100;
-		11'b000_0001101: q = 4'b0100;
-		11'b000_0001110: q = 4'b0100;
-		11'b000_0001111: q = 4'b0100;
-		11'b000_0010000: q = 4'b0100;
-		11'b000_0010001: q = 4'b0100;
-		11'b000_0010010: q = 4'b0100;
-		11'b000_0010011: q = 4'b0100;
-		11'b000_0010100: q = 4'b0100;
-		11'b000_0010101: q = 4'b0100;
-		11'b000_0010110: q = 4'b0100;
-		11'b000_0010111: q = 4'b0100;
-		11'b000_0011000: q = 4'b1000;
-		11'b000_0011001: q = 4'b1000;
-		11'b000_0011010: q = 4'b1000;
-		11'b000_0011011: q = 4'b1000;
-		11'b000_0011100: q = 4'b1000;
-		11'b000_0011101: q = 4'b1000;
-		11'b000_0011110: q = 4'b1000;
-		11'b000_0011111: q = 4'b1000;
-		11'b000_0100000: q = 4'b1000;
-		11'b000_0100001: q = 4'b1000;
-		11'b000_0100010: q = 4'b1000;
-		11'b000_0100011: q = 4'b1000;
-		11'b000_0100100: q = 4'b1000;
-		11'b000_0100101: q = 4'b1000;
-		11'b000_0100110: q = 4'b1000;
-		11'b000_0100111: q = 4'b1000;
-		11'b000_0101000: q = 4'b1000;
-		11'b000_0101001: q = 4'b1000;
-		11'b000_0101010: q = 4'b1000;
-		11'b000_0101011: q = 4'b1000;
-		11'b000_0101100: q = 4'b1000;
-		11'b000_0101101: q = 4'b1000;
-		11'b000_0101110: q = 4'b1000;
-		11'b000_0101111: q = 4'b1000;
-		11'b000_0110000: q = 4'b1000;
-		11'b000_0110001: q = 4'b1000;
-		11'b000_0110010: q = 4'b1000;
-		11'b000_0110011: q = 4'b1000;
-		11'b000_0110100: q = 4'b1000;
-		11'b000_0110101: q = 4'b1000;
-		11'b000_0110110: q = 4'b1000;
-		11'b000_0110111: q = 4'b1000;
-		11'b000_0111000: q = 4'b1000;
-		11'b000_0111001: q = 4'b1000;
-		11'b000_0111010: q = 4'b1000;
-		11'b000_0111011: q = 4'b1000;
-		11'b000_0111100: q = 4'b1000;
-		11'b000_0111101: q = 4'b1000;
-		11'b000_0111110: q = 4'b1000;
-		11'b000_0111111: q = 4'b1000;
-		11'b000_1000000: q = 4'b0001;
-		11'b000_1000001: q = 4'b0001;
-		11'b000_1000010: q = 4'b0001;
-		11'b000_1000011: q = 4'b0001;
-		11'b000_1000100: q = 4'b0001;
-		11'b000_1000101: q = 4'b0001;
-		11'b000_1000110: q = 4'b0001;
-		11'b000_1000111: q = 4'b0001;
-		11'b000_1001000: q = 4'b0001;
-		11'b000_1001001: q = 4'b0001;
-		11'b000_1001010: q = 4'b0001;
-		11'b000_1001011: q = 4'b0001;
-		11'b000_1001100: q = 4'b0001;
-		11'b000_1001101: q = 4'b0001;
-		11'b000_1001110: q = 4'b0001;
-		11'b000_1001111: q = 4'b0001;
-		11'b000_1010000: q = 4'b0001;
-		11'b000_1010001: q = 4'b0001;
-		11'b000_1010010: q = 4'b0001;
-		11'b000_1010011: q = 4'b0001;
-		11'b000_1010100: q = 4'b0001;
-		11'b000_1010101: q = 4'b0001;
-		11'b000_1010110: q = 4'b0001;
-		11'b000_1010111: q = 4'b0001;
-		11'b000_1011000: q = 4'b0001;
-		11'b000_1011001: q = 4'b0001;
-		11'b000_1011010: q = 4'b0001;
-		11'b000_1011011: q = 4'b0001;
-		11'b000_1011100: q = 4'b0001;
-		11'b000_1011101: q = 4'b0001;
-		11'b000_1011110: q = 4'b0001;
-		11'b000_1011111: q = 4'b0001;
-		11'b000_1100000: q = 4'b0001;
-		11'b000_1100001: q = 4'b0001;
-		11'b000_1100010: q = 4'b0001;
-		11'b000_1100011: q = 4'b0001;
-		11'b000_1100100: q = 4'b0001;
-		11'b000_1100101: q = 4'b0001;
-		11'b000_1100110: q = 4'b0010;
-		11'b000_1100111: q = 4'b0010;
-		11'b000_1101000: q = 4'b0010;
-		11'b000_1101001: q = 4'b0010;
-		11'b000_1101010: q = 4'b0010;
-		11'b000_1101011: q = 4'b0010;
-		11'b000_1101100: q = 4'b0010;
-		11'b000_1101101: q = 4'b0010;
-		11'b000_1101110: q = 4'b0010;
-		11'b000_1101111: q = 4'b0010;
-		11'b000_1110000: q = 4'b0010;
-		11'b000_1110001: q = 4'b0010;
-		11'b000_1110010: q = 4'b0010;
-		11'b000_1110011: q = 4'b0010;
-		11'b000_1110100: q = 4'b0010;
-		11'b000_1110101: q = 4'b0010;
-		11'b000_1110110: q = 4'b0010;
-		11'b000_1110111: q = 4'b0010;
-		11'b000_1111000: q = 4'b0000;
-		11'b000_1111001: q = 4'b0000;
-		11'b000_1111010: q = 4'b0000;
-		11'b000_1111011: q = 4'b0000;
-		11'b000_1111100: q = 4'b0000;
-		11'b000_1111101: q = 4'b0000;
-		11'b000_1111110: q = 4'b0000;
-		11'b000_1111111: q = 4'b0000;
-		11'b001_0000000: q = 4'b0000;
-		11'b001_0000001: q = 4'b0000;
-		11'b001_0000010: q = 4'b0000;
-		11'b001_0000011: q = 4'b0000;
-		11'b001_0000100: q = 4'b0000;
-		11'b001_0000101: q = 4'b0000;
-		11'b001_0000110: q = 4'b0000;
-		11'b001_0000111: q = 4'b0000;
-		11'b001_0001000: q = 4'b0100;
-		11'b001_0001001: q = 4'b0100;
-		11'b001_0001010: q = 4'b0100;
-		11'b001_0001011: q = 4'b0100;
-		11'b001_0001100: q = 4'b0100;
-		11'b001_0001101: q = 4'b0100;
-		11'b001_0001110: q = 4'b0100;
-		11'b001_0001111: q = 4'b0100;
-		11'b001_0010000: q = 4'b0100;
-		11'b001_0010001: q = 4'b0100;
-		11'b001_0010010: q = 4'b0100;
-		11'b001_0010011: q = 4'b0100;
-		11'b001_0010100: q = 4'b0100;
-		11'b001_0010101: q = 4'b0100;
-		11'b001_0010110: q = 4'b0100;
-		11'b001_0010111: q = 4'b0100;
-		11'b001_0011000: q = 4'b0100;
-		11'b001_0011001: q = 4'b0100;
-		11'b001_0011010: q = 4'b0100;
-		11'b001_0011011: q = 4'b0100;
-		11'b001_0011100: q = 4'b1000;
-		11'b001_0011101: q = 4'b1000;
-		11'b001_0011110: q = 4'b1000;
-		11'b001_0011111: q = 4'b1000;
-		11'b001_0100000: q = 4'b1000;
-		11'b001_0100001: q = 4'b1000;
-		11'b001_0100010: q = 4'b1000;
-		11'b001_0100011: q = 4'b1000;
-		11'b001_0100100: q = 4'b1000;
-		11'b001_0100101: q = 4'b1000;
-		11'b001_0100110: q = 4'b1000;
-		11'b001_0100111: q = 4'b1000;
-		11'b001_0101000: q = 4'b1000;
-		11'b001_0101001: q = 4'b1000;
-		11'b001_0101010: q = 4'b1000;
-		11'b001_0101011: q = 4'b1000;
-		11'b001_0101100: q = 4'b1000;
-		11'b001_0101101: q = 4'b1000;
-		11'b001_0101110: q = 4'b1000;
-		11'b001_0101111: q = 4'b1000;
-		11'b001_0110000: q = 4'b1000;
-		11'b001_0110001: q = 4'b1000;
-		11'b001_0110010: q = 4'b1000;
-		11'b001_0110011: q = 4'b1000;
-		11'b001_0110100: q = 4'b1000;
-		11'b001_0110101: q = 4'b1000;
-		11'b001_0110110: q = 4'b1000;
-		11'b001_0110111: q = 4'b1000;
-		11'b001_0111000: q = 4'b1000;
-		11'b001_0111001: q = 4'b1000;
-		11'b001_0111010: q = 4'b1000;
-		11'b001_0111011: q = 4'b1000;
-		11'b001_0111100: q = 4'b1000;
-		11'b001_0111101: q = 4'b1000;
-		11'b001_0111110: q = 4'b1000;
-		11'b001_0111111: q = 4'b1000;
-		11'b001_1000000: q = 4'b0001;
-		11'b001_1000001: q = 4'b0001;
-		11'b001_1000010: q = 4'b0001;
-		11'b001_1000011: q = 4'b0001;
-		11'b001_1000100: q = 4'b0001;
-		11'b001_1000101: q = 4'b0001;
-		11'b001_1000110: q = 4'b0001;
-		11'b001_1000111: q = 4'b0001;
-		11'b001_1001000: q = 4'b0001;
-		11'b001_1001001: q = 4'b0001;
-		11'b001_1001010: q = 4'b0001;
-		11'b001_1001011: q = 4'b0001;
-		11'b001_1001100: q = 4'b0001;
-		11'b001_1001101: q = 4'b0001;
-		11'b001_1001110: q = 4'b0001;
-		11'b001_1001111: q = 4'b0001;
-		11'b001_1010000: q = 4'b0001;
-		11'b001_1010001: q = 4'b0001;
-		11'b001_1010010: q = 4'b0001;
-		11'b001_1010011: q = 4'b0001;
-		11'b001_1010100: q = 4'b0001;
-		11'b001_1010101: q = 4'b0001;
-		11'b001_1010110: q = 4'b0001;
-		11'b001_1010111: q = 4'b0001;
-		11'b001_1011000: q = 4'b0001;
-		11'b001_1011001: q = 4'b0001;
-		11'b001_1011010: q = 4'b0001;
-		11'b001_1011011: q = 4'b0001;
-		11'b001_1011100: q = 4'b0001;
-		11'b001_1011101: q = 4'b0001;
-		11'b001_1011110: q = 4'b0001;
-		11'b001_1011111: q = 4'b0001;
-		11'b001_1100000: q = 4'b0001;
-		11'b001_1100001: q = 4'b0001;
-		11'b001_1100010: q = 4'b0001;
-		11'b001_1100011: q = 4'b0001;
-		11'b001_1100100: q = 4'b0010;
-		11'b001_1100101: q = 4'b0010;
-		11'b001_1100110: q = 4'b0010;
-		11'b001_1100111: q = 4'b0010;
-		11'b001_1101000: q = 4'b0010;
-		11'b001_1101001: q = 4'b0010;
-		11'b001_1101010: q = 4'b0010;
-		11'b001_1101011: q = 4'b0010;
-		11'b001_1101100: q = 4'b0010;
-		11'b001_1101101: q = 4'b0010;
-		11'b001_1101110: q = 4'b0010;
-		11'b001_1101111: q = 4'b0010;
-		11'b001_1110000: q = 4'b0010;
-		11'b001_1110001: q = 4'b0010;
-		11'b001_1110010: q = 4'b0010;
-		11'b001_1110011: q = 4'b0010;
-		11'b001_1110100: q = 4'b0010;
-		11'b001_1110101: q = 4'b0010;
-		11'b001_1110110: q = 4'b0000;
-		11'b001_1110111: q = 4'b0000;
-		11'b001_1111000: q = 4'b0000;
-		11'b001_1111001: q = 4'b0000;
-		11'b001_1111010: q = 4'b0000;
-		11'b001_1111011: q = 4'b0000;
-		11'b001_1111100: q = 4'b0000;
-		11'b001_1111101: q = 4'b0000;
-		11'b001_1111110: q = 4'b0000;
-		11'b001_1111111: q = 4'b0000;
-		11'b010_0000000: q = 4'b0000;
-		11'b010_0000001: q = 4'b0000;
-		11'b010_0000010: q = 4'b0000;
-		11'b010_0000011: q = 4'b0000;
-		11'b010_0000100: q = 4'b0000;
-		11'b010_0000101: q = 4'b0000;
-		11'b010_0000110: q = 4'b0000;
-		11'b010_0000111: q = 4'b0000;
-		11'b010_0001000: q = 4'b0100;
-		11'b010_0001001: q = 4'b0100;
-		11'b010_0001010: q = 4'b0100;
-		11'b010_0001011: q = 4'b0100;
-		11'b010_0001100: q = 4'b0100;
-		11'b010_0001101: q = 4'b0100;
-		11'b010_0001110: q = 4'b0100;
-		11'b010_0001111: q = 4'b0100;
-		11'b010_0010000: q = 4'b0100;
-		11'b010_0010001: q = 4'b0100;
-		11'b010_0010010: q = 4'b0100;
-		11'b010_0010011: q = 4'b0100;
-		11'b010_0010100: q = 4'b0100;
-		11'b010_0010101: q = 4'b0100;
-		11'b010_0010110: q = 4'b0100;
-		11'b010_0010111: q = 4'b0100;
-		11'b010_0011000: q = 4'b0100;
-		11'b010_0011001: q = 4'b0100;
-		11'b010_0011010: q = 4'b0100;
-		11'b010_0011011: q = 4'b0100;
-		11'b010_0011100: q = 4'b0100;
-		11'b010_0011101: q = 4'b0100;
-		11'b010_0011110: q = 4'b0100;
-		11'b010_0011111: q = 4'b0100;
-		11'b010_0100000: q = 4'b1000;
-		11'b010_0100001: q = 4'b1000;
-		11'b010_0100010: q = 4'b1000;
-		11'b010_0100011: q = 4'b1000;
-		11'b010_0100100: q = 4'b1000;
-		11'b010_0100101: q = 4'b1000;
-		11'b010_0100110: q = 4'b1000;
-		11'b010_0100111: q = 4'b1000;
-		11'b010_0101000: q = 4'b1000;
-		11'b010_0101001: q = 4'b1000;
-		11'b010_0101010: q = 4'b1000;
-		11'b010_0101011: q = 4'b1000;
-		11'b010_0101100: q = 4'b1000;
-		11'b010_0101101: q = 4'b1000;
-		11'b010_0101110: q = 4'b1000;
-		11'b010_0101111: q = 4'b1000;
-		11'b010_0110000: q = 4'b1000;
-		11'b010_0110001: q = 4'b1000;
-		11'b010_0110010: q = 4'b1000;
-		11'b010_0110011: q = 4'b1000;
-		11'b010_0110100: q = 4'b1000;
-		11'b010_0110101: q = 4'b1000;
-		11'b010_0110110: q = 4'b1000;
-		11'b010_0110111: q = 4'b1000;
-		11'b010_0111000: q = 4'b1000;
-		11'b010_0111001: q = 4'b1000;
-		11'b010_0111010: q = 4'b1000;
-		11'b010_0111011: q = 4'b1000;
-		11'b010_0111100: q = 4'b1000;
-		11'b010_0111101: q = 4'b1000;
-		11'b010_0111110: q = 4'b1000;
-		11'b010_0111111: q = 4'b1000;
-		11'b010_1000000: q = 4'b0001;
-		11'b010_1000001: q = 4'b0001;
-		11'b010_1000010: q = 4'b0001;
-		11'b010_1000011: q = 4'b0001;
-		11'b010_1000100: q = 4'b0001;
-		11'b010_1000101: q = 4'b0001;
-		11'b010_1000110: q = 4'b0001;
-		11'b010_1000111: q = 4'b0001;
-		11'b010_1001000: q = 4'b0001;
-		11'b010_1001001: q = 4'b0001;
-		11'b010_1001010: q = 4'b0001;
-		11'b010_1001011: q = 4'b0001;
-		11'b010_1001100: q = 4'b0001;
-		11'b010_1001101: q = 4'b0001;
-		11'b010_1001110: q = 4'b0001;
-		11'b010_1001111: q = 4'b0001;
-		11'b010_1010000: q = 4'b0001;
-		11'b010_1010001: q = 4'b0001;
-		11'b010_1010010: q = 4'b0001;
-		11'b010_1010011: q = 4'b0001;
-		11'b010_1010100: q = 4'b0001;
-		11'b010_1010101: q = 4'b0001;
-		11'b010_1010110: q = 4'b0001;
-		11'b010_1010111: q = 4'b0001;
-		11'b010_1011000: q = 4'b0001;
-		11'b010_1011001: q = 4'b0001;
-		11'b010_1011010: q = 4'b0001;
-		11'b010_1011011: q = 4'b0001;
-		11'b010_1011100: q = 4'b0001;
-		11'b010_1011101: q = 4'b0001;
-		11'b010_1011110: q = 4'b0001;
-		11'b010_1011111: q = 4'b0001;
-		11'b010_1100000: q = 4'b0010;
-		11'b010_1100001: q = 4'b0010;
-		11'b010_1100010: q = 4'b0010;
-		11'b010_1100011: q = 4'b0010;
-		11'b010_1100100: q = 4'b0010;
-		11'b010_1100101: q = 4'b0010;
-		11'b010_1100110: q = 4'b0010;
-		11'b010_1100111: q = 4'b0010;
-		11'b010_1101000: q = 4'b0010;
-		11'b010_1101001: q = 4'b0010;
-		11'b010_1101010: q = 4'b0010;
-		11'b010_1101011: q = 4'b0010;
-		11'b010_1101100: q = 4'b0010;
-		11'b010_1101101: q = 4'b0010;
-		11'b010_1101110: q = 4'b0010;
-		11'b010_1101111: q = 4'b0010;
-		11'b010_1110000: q = 4'b0010;
-		11'b010_1110001: q = 4'b0010;
-		11'b010_1110010: q = 4'b0010;
-		11'b010_1110011: q = 4'b0010;
-		11'b010_1110100: q = 4'b0000;
-		11'b010_1110101: q = 4'b0000;
-		11'b010_1110110: q = 4'b0000;
-		11'b010_1110111: q = 4'b0000;
-		11'b010_1111000: q = 4'b0000;
-		11'b010_1111001: q = 4'b0000;
-		11'b010_1111010: q = 4'b0000;
-		11'b010_1111011: q = 4'b0000;
-		11'b010_1111100: q = 4'b0000;
-		11'b010_1111101: q = 4'b0000;
-		11'b010_1111110: q = 4'b0000;
-		11'b010_1111111: q = 4'b0000;
-		11'b011_0000000: q = 4'b0000;
-		11'b011_0000001: q = 4'b0000;
-		11'b011_0000010: q = 4'b0000;
-		11'b011_0000011: q = 4'b0000;
-		11'b011_0000100: q = 4'b0000;
-		11'b011_0000101: q = 4'b0000;
-		11'b011_0000110: q = 4'b0000;
-		11'b011_0000111: q = 4'b0000;
-		11'b011_0001000: q = 4'b0100;
-		11'b011_0001001: q = 4'b0100;
-		11'b011_0001010: q = 4'b0100;
-		11'b011_0001011: q = 4'b0100;
-		11'b011_0001100: q = 4'b0100;
-		11'b011_0001101: q = 4'b0100;
-		11'b011_0001110: q = 4'b0100;
-		11'b011_0001111: q = 4'b0100;
-		11'b011_0010000: q = 4'b0100;
-		11'b011_0010001: q = 4'b0100;
-		11'b011_0010010: q = 4'b0100;
-		11'b011_0010011: q = 4'b0100;
-		11'b011_0010100: q = 4'b0100;
-		11'b011_0010101: q = 4'b0100;
-		11'b011_0010110: q = 4'b0100;
-		11'b011_0010111: q = 4'b0100;
-		11'b011_0011000: q = 4'b0100;
-		11'b011_0011001: q = 4'b0100;
-		11'b011_0011010: q = 4'b0100;
-		11'b011_0011011: q = 4'b0100;
-		11'b011_0011100: q = 4'b0100;
-		11'b011_0011101: q = 4'b0100;
-		11'b011_0011110: q = 4'b0100;
-		11'b011_0011111: q = 4'b0100;
-		11'b011_0100000: q = 4'b1000;
-		11'b011_0100001: q = 4'b1000;
-		11'b011_0100010: q = 4'b1000;
-		11'b011_0100011: q = 4'b1000;
-		11'b011_0100100: q = 4'b1000;
-		11'b011_0100101: q = 4'b1000;
-		11'b011_0100110: q = 4'b1000;
-		11'b011_0100111: q = 4'b1000;
-		11'b011_0101000: q = 4'b1000;
-		11'b011_0101001: q = 4'b1000;
-		11'b011_0101010: q = 4'b1000;
-		11'b011_0101011: q = 4'b1000;
-		11'b011_0101100: q = 4'b1000;
-		11'b011_0101101: q = 4'b1000;
-		11'b011_0101110: q = 4'b1000;
-		11'b011_0101111: q = 4'b1000;
-		11'b011_0110000: q = 4'b1000;
-		11'b011_0110001: q = 4'b1000;
-		11'b011_0110010: q = 4'b1000;
-		11'b011_0110011: q = 4'b1000;
-		11'b011_0110100: q = 4'b1000;
-		11'b011_0110101: q = 4'b1000;
-		11'b011_0110110: q = 4'b1000;
-		11'b011_0110111: q = 4'b1000;
-		11'b011_0111000: q = 4'b1000;
-		11'b011_0111001: q = 4'b1000;
-		11'b011_0111010: q = 4'b1000;
-		11'b011_0111011: q = 4'b1000;
-		11'b011_0111100: q = 4'b1000;
-		11'b011_0111101: q = 4'b1000;
-		11'b011_0111110: q = 4'b1000;
-		11'b011_0111111: q = 4'b1000;
-		11'b011_1000000: q = 4'b0001;
-		11'b011_1000001: q = 4'b0001;
-		11'b011_1000010: q = 4'b0001;
-		11'b011_1000011: q = 4'b0001;
-		11'b011_1000100: q = 4'b0001;
-		11'b011_1000101: q = 4'b0001;
-		11'b011_1000110: q = 4'b0001;
-		11'b011_1000111: q = 4'b0001;
-		11'b011_1001000: q = 4'b0001;
-		11'b011_1001001: q = 4'b0001;
-		11'b011_1001010: q = 4'b0001;
-		11'b011_1001011: q = 4'b0001;
-		11'b011_1001100: q = 4'b0001;
-		11'b011_1001101: q = 4'b0001;
-		11'b011_1001110: q = 4'b0001;
-		11'b011_1001111: q = 4'b0001;
-		11'b011_1010000: q = 4'b0001;
-		11'b011_1010001: q = 4'b0001;
-		11'b011_1010010: q = 4'b0001;
-		11'b011_1010011: q = 4'b0001;
-		11'b011_1010100: q = 4'b0001;
-		11'b011_1010101: q = 4'b0001;
-		11'b011_1010110: q = 4'b0001;
-		11'b011_1010111: q = 4'b0001;
-		11'b011_1011000: q = 4'b0001;
-		11'b011_1011001: q = 4'b0001;
-		11'b011_1011010: q = 4'b0001;
-		11'b011_1011011: q = 4'b0001;
-		11'b011_1011100: q = 4'b0001;
-		11'b011_1011101: q = 4'b0001;
-		11'b011_1011110: q = 4'b0010;
-		11'b011_1011111: q = 4'b0010;
-		11'b011_1100000: q = 4'b0010;
-		11'b011_1100001: q = 4'b0010;
-		11'b011_1100010: q = 4'b0010;
-		11'b011_1100011: q = 4'b0010;
-		11'b011_1100100: q = 4'b0010;
-		11'b011_1100101: q = 4'b0010;
-		11'b011_1100110: q = 4'b0010;
-		11'b011_1100111: q = 4'b0010;
-		11'b011_1101000: q = 4'b0010;
-		11'b011_1101001: q = 4'b0010;
-		11'b011_1101010: q = 4'b0010;
-		11'b011_1101011: q = 4'b0010;
-		11'b011_1101100: q = 4'b0010;
-		11'b011_1101101: q = 4'b0010;
-		11'b011_1101110: q = 4'b0010;
-		11'b011_1101111: q = 4'b0010;
-		11'b011_1110000: q = 4'b0010;
-		11'b011_1110001: q = 4'b0010;
-		11'b011_1110010: q = 4'b0010;
-		11'b011_1110011: q = 4'b0010;
-		11'b011_1110100: q = 4'b0000;
-		11'b011_1110101: q = 4'b0000;
-		11'b011_1110110: q = 4'b0000;
-		11'b011_1110111: q = 4'b0000;
-		11'b011_1111000: q = 4'b0000;
-		11'b011_1111001: q = 4'b0000;
-		11'b011_1111010: q = 4'b0000;
-		11'b011_1111011: q = 4'b0000;
-		11'b011_1111100: q = 4'b0000;
-		11'b011_1111101: q = 4'b0000;
-		11'b011_1111110: q = 4'b0000;
-		11'b011_1111111: q = 4'b0000;
-		11'b100_0000000: q = 4'b0000;
-		11'b100_0000001: q = 4'b0000;
-		11'b100_0000010: q = 4'b0000;
-		11'b100_0000011: q = 4'b0000;
-		11'b100_0000100: q = 4'b0000;
-		11'b100_0000101: q = 4'b0000;
-		11'b100_0000110: q = 4'b0000;
-		11'b100_0000111: q = 4'b0000;
-		11'b100_0001000: q = 4'b0000;
-		11'b100_0001001: q = 4'b0000;
-		11'b100_0001010: q = 4'b0000;
-		11'b100_0001011: q = 4'b0000;
-		11'b100_0001100: q = 4'b0100;
-		11'b100_0001101: q = 4'b0100;
-		11'b100_0001110: q = 4'b0100;
-		11'b100_0001111: q = 4'b0100;
-		11'b100_0010000: q = 4'b0100;
-		11'b100_0010001: q = 4'b0100;
-		11'b100_0010010: q = 4'b0100;
-		11'b100_0010011: q = 4'b0100;
-		11'b100_0010100: q = 4'b0100;
-		11'b100_0010101: q = 4'b0100;
-		11'b100_0010110: q = 4'b0100;
-		11'b100_0010111: q = 4'b0100;
-		11'b100_0011000: q = 4'b0100;
-		11'b100_0011001: q = 4'b0100;
-		11'b100_0011010: q = 4'b0100;
-		11'b100_0011011: q = 4'b0100;
-		11'b100_0011100: q = 4'b0100;
-		11'b100_0011101: q = 4'b0100;
-		11'b100_0011110: q = 4'b0100;
-		11'b100_0011111: q = 4'b0100;
-		11'b100_0100000: q = 4'b0100;
-		11'b100_0100001: q = 4'b0100;
-		11'b100_0100010: q = 4'b0100;
-		11'b100_0100011: q = 4'b0100;
-		11'b100_0100100: q = 4'b1000;
-		11'b100_0100101: q = 4'b1000;
-		11'b100_0100110: q = 4'b1000;
-		11'b100_0100111: q = 4'b1000;
-		11'b100_0101000: q = 4'b1000;
-		11'b100_0101001: q = 4'b1000;
-		11'b100_0101010: q = 4'b1000;
-		11'b100_0101011: q = 4'b1000;
-		11'b100_0101100: q = 4'b1000;
-		11'b100_0101101: q = 4'b1000;
-		11'b100_0101110: q = 4'b1000;
-		11'b100_0101111: q = 4'b1000;
-		11'b100_0110000: q = 4'b1000;
-		11'b100_0110001: q = 4'b1000;
-		11'b100_0110010: q = 4'b1000;
-		11'b100_0110011: q = 4'b1000;
-		11'b100_0110100: q = 4'b1000;
-		11'b100_0110101: q = 4'b1000;
-		11'b100_0110110: q = 4'b1000;
-		11'b100_0110111: q = 4'b1000;
-		11'b100_0111000: q = 4'b1000;
-		11'b100_0111001: q = 4'b1000;
-		11'b100_0111010: q = 4'b1000;
-		11'b100_0111011: q = 4'b1000;
-		11'b100_0111100: q = 4'b1000;
-		11'b100_0111101: q = 4'b1000;
-		11'b100_0111110: q = 4'b1000;
-		11'b100_0111111: q = 4'b1000;
-		11'b100_1000000: q = 4'b0001;
-		11'b100_1000001: q = 4'b0001;
-		11'b100_1000010: q = 4'b0001;
-		11'b100_1000011: q = 4'b0001;
-		11'b100_1000100: q = 4'b0001;
-		11'b100_1000101: q = 4'b0001;
-		11'b100_1000110: q = 4'b0001;
-		11'b100_1000111: q = 4'b0001;
-		11'b100_1001000: q = 4'b0001;
-		11'b100_1001001: q = 4'b0001;
-		11'b100_1001010: q = 4'b0001;
-		11'b100_1001011: q = 4'b0001;
-		11'b100_1001100: q = 4'b0001;
-		11'b100_1001101: q = 4'b0001;
-		11'b100_1001110: q = 4'b0001;
-		11'b100_1001111: q = 4'b0001;
-		11'b100_1010000: q = 4'b0001;
-		11'b100_1010001: q = 4'b0001;
-		11'b100_1010010: q = 4'b0001;
-		11'b100_1010011: q = 4'b0001;
-		11'b100_1010100: q = 4'b0001;
-		11'b100_1010101: q = 4'b0001;
-		11'b100_1010110: q = 4'b0001;
-		11'b100_1010111: q = 4'b0001;
-		11'b100_1011000: q = 4'b0001;
-		11'b100_1011001: q = 4'b0001;
-		11'b100_1011010: q = 4'b0001;
-		11'b100_1011011: q = 4'b0001;
-		11'b100_1011100: q = 4'b0010;
-		11'b100_1011101: q = 4'b0010;
-		11'b100_1011110: q = 4'b0010;
-		11'b100_1011111: q = 4'b0010;
-		11'b100_1100000: q = 4'b0010;
-		11'b100_1100001: q = 4'b0010;
-		11'b100_1100010: q = 4'b0010;
-		11'b100_1100011: q = 4'b0010;
-		11'b100_1100100: q = 4'b0010;
-		11'b100_1100101: q = 4'b0010;
-		11'b100_1100110: q = 4'b0010;
-		11'b100_1100111: q = 4'b0010;
-		11'b100_1101000: q = 4'b0010;
-		11'b100_1101001: q = 4'b0010;
-		11'b100_1101010: q = 4'b0010;
-		11'b100_1101011: q = 4'b0010;
-		11'b100_1101100: q = 4'b0010;
-		11'b100_1101101: q = 4'b0010;
-		11'b100_1101110: q = 4'b0010;
-		11'b100_1101111: q = 4'b0010;
-		11'b100_1110000: q = 4'b0010;
-		11'b100_1110001: q = 4'b0010;
-		11'b100_1110010: q = 4'b0010;
-		11'b100_1110011: q = 4'b0010;
-		11'b100_1110100: q = 4'b0000;
-		11'b100_1110101: q = 4'b0000;
-		11'b100_1110110: q = 4'b0000;
-		11'b100_1110111: q = 4'b0000;
-		11'b100_1111000: q = 4'b0000;
-		11'b100_1111001: q = 4'b0000;
-		11'b100_1111010: q = 4'b0000;
-		11'b100_1111011: q = 4'b0000;
-		11'b100_1111100: q = 4'b0000;
-		11'b100_1111101: q = 4'b0000;
-		11'b100_1111110: q = 4'b0000;
-		11'b100_1111111: q = 4'b0000;
-		11'b101_0000000: q = 4'b0000;
-		11'b101_0000001: q = 4'b0000;
-		11'b101_0000010: q = 4'b0000;
-		11'b101_0000011: q = 4'b0000;
-		11'b101_0000100: q = 4'b0000;
-		11'b101_0000101: q = 4'b0000;
-		11'b101_0000110: q = 4'b0000;
-		11'b101_0000111: q = 4'b0000;
-		11'b101_0001000: q = 4'b0000;
-		11'b101_0001001: q = 4'b0000;
-		11'b101_0001010: q = 4'b0000;
-		11'b101_0001011: q = 4'b0000;
-		11'b101_0001100: q = 4'b0100;
-		11'b101_0001101: q = 4'b0100;
-		11'b101_0001110: q = 4'b0100;
-		11'b101_0001111: q = 4'b0100;
-		11'b101_0010000: q = 4'b0100;
-		11'b101_0010001: q = 4'b0100;
-		11'b101_0010010: q = 4'b0100;
-		11'b101_0010011: q = 4'b0100;
-		11'b101_0010100: q = 4'b0100;
-		11'b101_0010101: q = 4'b0100;
-		11'b101_0010110: q = 4'b0100;
-		11'b101_0010111: q = 4'b0100;
-		11'b101_0011000: q = 4'b0100;
-		11'b101_0011001: q = 4'b0100;
-		11'b101_0011010: q = 4'b0100;
-		11'b101_0011011: q = 4'b0100;
-		11'b101_0011100: q = 4'b0100;
-		11'b101_0011101: q = 4'b0100;
-		11'b101_0011110: q = 4'b0100;
-		11'b101_0011111: q = 4'b0100;
-		11'b101_0100000: q = 4'b0100;
-		11'b101_0100001: q = 4'b0100;
-		11'b101_0100010: q = 4'b0100;
-		11'b101_0100011: q = 4'b0100;
-		11'b101_0100100: q = 4'b0100;
-		11'b101_0100101: q = 4'b0100;
-		11'b101_0100110: q = 4'b0100;
-		11'b101_0100111: q = 4'b0100;
-		11'b101_0101000: q = 4'b1000;
-		11'b101_0101001: q = 4'b1000;
-		11'b101_0101010: q = 4'b1000;
-		11'b101_0101011: q = 4'b1000;
-		11'b101_0101100: q = 4'b1000;
-		11'b101_0101101: q = 4'b1000;
-		11'b101_0101110: q = 4'b1000;
-		11'b101_0101111: q = 4'b1000;
-		11'b101_0110000: q = 4'b1000;
-		11'b101_0110001: q = 4'b1000;
-		11'b101_0110010: q = 4'b1000;
-		11'b101_0110011: q = 4'b1000;
-		11'b101_0110100: q = 4'b1000;
-		11'b101_0110101: q = 4'b1000;
-		11'b101_0110110: q = 4'b1000;
-		11'b101_0110111: q = 4'b1000;
-		11'b101_0111000: q = 4'b1000;
-		11'b101_0111001: q = 4'b1000;
-		11'b101_0111010: q = 4'b1000;
-		11'b101_0111011: q = 4'b1000;
-		11'b101_0111100: q = 4'b1000;
-		11'b101_0111101: q = 4'b1000;
-		11'b101_0111110: q = 4'b1000;
-		11'b101_0111111: q = 4'b1000;
-		11'b101_1000000: q = 4'b0001;
-		11'b101_1000001: q = 4'b0001;
-		11'b101_1000010: q = 4'b0001;
-		11'b101_1000011: q = 4'b0001;
-		11'b101_1000100: q = 4'b0001;
-		11'b101_1000101: q = 4'b0001;
-		11'b101_1000110: q = 4'b0001;
-		11'b101_1000111: q = 4'b0001;
-		11'b101_1001000: q = 4'b0001;
-		11'b101_1001001: q = 4'b0001;
-		11'b101_1001010: q = 4'b0001;
-		11'b101_1001011: q = 4'b0001;
-		11'b101_1001100: q = 4'b0001;
-		11'b101_1001101: q = 4'b0001;
-		11'b101_1001110: q = 4'b0001;
-		11'b101_1001111: q = 4'b0001;
-		11'b101_1010000: q = 4'b0001;
-		11'b101_1010001: q = 4'b0001;
-		11'b101_1010010: q = 4'b0001;
-		11'b101_1010011: q = 4'b0001;
-		11'b101_1010100: q = 4'b0001;
-		11'b101_1010101: q = 4'b0001;
-		11'b101_1010110: q = 4'b0001;
-		11'b101_1010111: q = 4'b0001;
-		11'b101_1011000: q = 4'b0010;
-		11'b101_1011001: q = 4'b0010;
-		11'b101_1011010: q = 4'b0010;
-		11'b101_1011011: q = 4'b0010;
-		11'b101_1011100: q = 4'b0010;
-		11'b101_1011101: q = 4'b0010;
-		11'b101_1011110: q = 4'b0010;
-		11'b101_1011111: q = 4'b0010;
-		11'b101_1100000: q = 4'b0010;
-		11'b101_1100001: q = 4'b0010;
-		11'b101_1100010: q = 4'b0010;
-		11'b101_1100011: q = 4'b0010;
-		11'b101_1100100: q = 4'b0010;
-		11'b101_1100101: q = 4'b0010;
-		11'b101_1100110: q = 4'b0010;
-		11'b101_1100111: q = 4'b0010;
-		11'b101_1101000: q = 4'b0010;
-		11'b101_1101001: q = 4'b0010;
-		11'b101_1101010: q = 4'b0010;
-		11'b101_1101011: q = 4'b0010;
-		11'b101_1101100: q = 4'b0010;
-		11'b101_1101101: q = 4'b0010;
-		11'b101_1101110: q = 4'b0010;
-		11'b101_1101111: q = 4'b0010;
-		11'b101_1110000: q = 4'b0000;
-		11'b101_1110001: q = 4'b0000;
-		11'b101_1110010: q = 4'b0000;
-		11'b101_1110011: q = 4'b0000;
-		11'b101_1110100: q = 4'b0000;
-		11'b101_1110101: q = 4'b0000;
-		11'b101_1110110: q = 4'b0000;
-		11'b101_1110111: q = 4'b0000;
-		11'b101_1111000: q = 4'b0000;
-		11'b101_1111001: q = 4'b0000;
-		11'b101_1111010: q = 4'b0000;
-		11'b101_1111011: q = 4'b0000;
-		11'b101_1111100: q = 4'b0000;
-		11'b101_1111101: q = 4'b0000;
-		11'b101_1111110: q = 4'b0000;
-		11'b101_1111111: q = 4'b0000;
-		11'b110_0000000: q = 4'b0000;
-		11'b110_0000001: q = 4'b0000;
-		11'b110_0000010: q = 4'b0000;
-		11'b110_0000011: q = 4'b0000;
-		11'b110_0000100: q = 4'b0000;
-		11'b110_0000101: q = 4'b0000;
-		11'b110_0000110: q = 4'b0000;
-		11'b110_0000111: q = 4'b0000;
-		11'b110_0001000: q = 4'b0000;
-		11'b110_0001001: q = 4'b0000;
-		11'b110_0001010: q = 4'b0000;
-		11'b110_0001011: q = 4'b0000;
-		11'b110_0001100: q = 4'b0000;
-		11'b110_0001101: q = 4'b0000;
-		11'b110_0001110: q = 4'b0000;
-		11'b110_0001111: q = 4'b0000;
-		11'b110_0010000: q = 4'b0100;
-		11'b110_0010001: q = 4'b0100;
-		11'b110_0010010: q = 4'b0100;
-		11'b110_0010011: q = 4'b0100;
-		11'b110_0010100: q = 4'b0100;
-		11'b110_0010101: q = 4'b0100;
-		11'b110_0010110: q = 4'b0100;
-		11'b110_0010111: q = 4'b0100;
-		11'b110_0011000: q = 4'b0100;
-		11'b110_0011001: q = 4'b0100;
-		11'b110_0011010: q = 4'b0100;
-		11'b110_0011011: q = 4'b0100;
-		11'b110_0011100: q = 4'b0100;
-		11'b110_0011101: q = 4'b0100;
-		11'b110_0011110: q = 4'b0100;
-		11'b110_0011111: q = 4'b0100;
-		11'b110_0100000: q = 4'b0100;
-		11'b110_0100001: q = 4'b0100;
-		11'b110_0100010: q = 4'b0100;
-		11'b110_0100011: q = 4'b0100;
-		11'b110_0100100: q = 4'b0100;
-		11'b110_0100101: q = 4'b0100;
-		11'b110_0100110: q = 4'b0100;
-		11'b110_0100111: q = 4'b0100;
-		11'b110_0101000: q = 4'b1000;
-		11'b110_0101001: q = 4'b1000;
-		11'b110_0101010: q = 4'b1000;
-		11'b110_0101011: q = 4'b1000;
-		11'b110_0101100: q = 4'b1000;
-		11'b110_0101101: q = 4'b1000;
-		11'b110_0101110: q = 4'b1000;
-		11'b110_0101111: q = 4'b1000;
-		11'b110_0110000: q = 4'b1000;
-		11'b110_0110001: q = 4'b1000;
-		11'b110_0110010: q = 4'b1000;
-		11'b110_0110011: q = 4'b1000;
-		11'b110_0110100: q = 4'b1000;
-		11'b110_0110101: q = 4'b1000;
-		11'b110_0110110: q = 4'b1000;
-		11'b110_0110111: q = 4'b1000;
-		11'b110_0111000: q = 4'b1000;
-		11'b110_0111001: q = 4'b1000;
-		11'b110_0111010: q = 4'b1000;
-		11'b110_0111011: q = 4'b1000;
-		11'b110_0111100: q = 4'b1000;
-		11'b110_0111101: q = 4'b1000;
-		11'b110_0111110: q = 4'b1000;
-		11'b110_0111111: q = 4'b1000;
-		11'b110_1000000: q = 4'b0001;
-		11'b110_1000001: q = 4'b0001;
-		11'b110_1000010: q = 4'b0001;
-		11'b110_1000011: q = 4'b0001;
-		11'b110_1000100: q = 4'b0001;
-		11'b110_1000101: q = 4'b0001;
-		11'b110_1000110: q = 4'b0001;
-		11'b110_1000111: q = 4'b0001;
-		11'b110_1001000: q = 4'b0001;
-		11'b110_1001001: q = 4'b0001;
-		11'b110_1001010: q = 4'b0001;
-		11'b110_1001011: q = 4'b0001;
-		11'b110_1001100: q = 4'b0001;
-		11'b110_1001101: q = 4'b0001;
-		11'b110_1001110: q = 4'b0001;
-		11'b110_1001111: q = 4'b0001;
-		11'b110_1010000: q = 4'b0001;
-		11'b110_1010001: q = 4'b0001;
-		11'b110_1010010: q = 4'b0001;
-		11'b110_1010011: q = 4'b0001;
-		11'b110_1010100: q = 4'b0010;
-		11'b110_1010101: q = 4'b0010;
-		11'b110_1010110: q = 4'b0010;
-		11'b110_1010111: q = 4'b0010;
-		11'b110_1011000: q = 4'b0010;
-		11'b110_1011001: q = 4'b0010;
-		11'b110_1011010: q = 4'b0010;
-		11'b110_1011011: q = 4'b0010;
-		11'b110_1011100: q = 4'b0010;
-		11'b110_1011101: q = 4'b0010;
-		11'b110_1011110: q = 4'b0010;
-		11'b110_1011111: q = 4'b0010;
-		11'b110_1100000: q = 4'b0010;
-		11'b110_1100001: q = 4'b0010;
-		11'b110_1100010: q = 4'b0010;
-		11'b110_1100011: q = 4'b0010;
-		11'b110_1100100: q = 4'b0010;
-		11'b110_1100101: q = 4'b0010;
-		11'b110_1100110: q = 4'b0010;
-		11'b110_1100111: q = 4'b0010;
-		11'b110_1101000: q = 4'b0010;
-		11'b110_1101001: q = 4'b0010;
-		11'b110_1101010: q = 4'b0010;
-		11'b110_1101011: q = 4'b0010;
-		11'b110_1101100: q = 4'b0010;
-		11'b110_1101101: q = 4'b0010;
-		11'b110_1101110: q = 4'b0010;
-		11'b110_1101111: q = 4'b0010;
-		11'b110_1110000: q = 4'b0000;
-		11'b110_1110001: q = 4'b0000;
-		11'b110_1110010: q = 4'b0000;
-		11'b110_1110011: q = 4'b0000;
-		11'b110_1110100: q = 4'b0000;
-		11'b110_1110101: q = 4'b0000;
-		11'b110_1110110: q = 4'b0000;
-		11'b110_1110111: q = 4'b0000;
-		11'b110_1111000: q = 4'b0000;
-		11'b110_1111001: q = 4'b0000;
-		11'b110_1111010: q = 4'b0000;
-		11'b110_1111011: q = 4'b0000;
-		11'b110_1111100: q = 4'b0000;
-		11'b110_1111101: q = 4'b0000;
-		11'b110_1111110: q = 4'b0000;
-		11'b110_1111111: q = 4'b0000;
-		11'b111_0000000: q = 4'b0000;
-		11'b111_0000001: q = 4'b0000;
-		11'b111_0000010: q = 4'b0000;
-		11'b111_0000011: q = 4'b0000;
-		11'b111_0000100: q = 4'b0000;
-		11'b111_0000101: q = 4'b0000;
-		11'b111_0000110: q = 4'b0000;
-		11'b111_0000111: q = 4'b0000;
-		11'b111_0001000: q = 4'b0000;
-		11'b111_0001001: q = 4'b0000;
-		11'b111_0001010: q = 4'b0000;
-		11'b111_0001011: q = 4'b0000;
-		11'b111_0001100: q = 4'b0000;
-		11'b111_0001101: q = 4'b0000;
-		11'b111_0001110: q = 4'b0000;
-		11'b111_0001111: q = 4'b0000;
-		11'b111_0010000: q = 4'b0100;
-		11'b111_0010001: q = 4'b0100;
-		11'b111_0010010: q = 4'b0100;
-		11'b111_0010011: q = 4'b0100;
-		11'b111_0010100: q = 4'b0100;
-		11'b111_0010101: q = 4'b0100;
-		11'b111_0010110: q = 4'b0100;
-		11'b111_0010111: q = 4'b0100;
-		11'b111_0011000: q = 4'b0100;
-		11'b111_0011001: q = 4'b0100;
-		11'b111_0011010: q = 4'b0100;
-		11'b111_0011011: q = 4'b0100;
-		11'b111_0011100: q = 4'b0100;
-		11'b111_0011101: q = 4'b0100;
-		11'b111_0011110: q = 4'b0100;
-		11'b111_0011111: q = 4'b0100;
-		11'b111_0100000: q = 4'b0100;
-		11'b111_0100001: q = 4'b0100;
-		11'b111_0100010: q = 4'b0100;
-		11'b111_0100011: q = 4'b0100;
-		11'b111_0100100: q = 4'b0100;
-		11'b111_0100101: q = 4'b0100;
-		11'b111_0100110: q = 4'b0100;
-		11'b111_0100111: q = 4'b0100;
-		11'b111_0101000: q = 4'b0100;
-		11'b111_0101001: q = 4'b0100;
-		11'b111_0101010: q = 4'b0100;
-		11'b111_0101011: q = 4'b0100;
-		11'b111_0101100: q = 4'b1000;
-		11'b111_0101101: q = 4'b1000;
-		11'b111_0101110: q = 4'b1000;
-		11'b111_0101111: q = 4'b1000;
-		11'b111_0110000: q = 4'b1000;
-		11'b111_0110001: q = 4'b1000;
-		11'b111_0110010: q = 4'b1000;
-		11'b111_0110011: q = 4'b1000;
-		11'b111_0110100: q = 4'b1000;
-		11'b111_0110101: q = 4'b1000;
-		11'b111_0110110: q = 4'b1000;
-		11'b111_0110111: q = 4'b1000;
-		11'b111_0111000: q = 4'b1000;
-		11'b111_0111001: q = 4'b1000;
-		11'b111_0111010: q = 4'b1000;
-		11'b111_0111011: q = 4'b1000;
-		11'b111_0111100: q = 4'b1000;
-		11'b111_0111101: q = 4'b1000;
-		11'b111_0111110: q = 4'b1000;
-		11'b111_0111111: q = 4'b1000;
-		11'b111_1000000: q = 4'b0001;
-		11'b111_1000001: q = 4'b0001;
-		11'b111_1000010: q = 4'b0001;
-		11'b111_1000011: q = 4'b0001;
-		11'b111_1000100: q = 4'b0001;
-		11'b111_1000101: q = 4'b0001;
-		11'b111_1000110: q = 4'b0001;
-		11'b111_1000111: q = 4'b0001;
-		11'b111_1001000: q = 4'b0001;
-		11'b111_1001001: q = 4'b0001;
-		11'b111_1001010: q = 4'b0001;
-		11'b111_1001011: q = 4'b0001;
-		11'b111_1001100: q = 4'b0001;
-		11'b111_1001101: q = 4'b0001;
-		11'b111_1001110: q = 4'b0001;
-		11'b111_1001111: q = 4'b0001;
-		11'b111_1010000: q = 4'b0001;
-		11'b111_1010001: q = 4'b0001;
-		11'b111_1010010: q = 4'b0010;
-		11'b111_1010011: q = 4'b0010;
-		11'b111_1010100: q = 4'b0010;
-		11'b111_1010101: q = 4'b0010;
-		11'b111_1010110: q = 4'b0010;
-		11'b111_1010111: q = 4'b0010;
-		11'b111_1011000: q = 4'b0010;
-		11'b111_1011001: q = 4'b0010;
-		11'b111_1011010: q = 4'b0010;
-		11'b111_1011011: q = 4'b0010;
-		11'b111_1011100: q = 4'b0010;
-		11'b111_1011101: q = 4'b0010;
-		11'b111_1011110: q = 4'b0010;
-		11'b111_1011111: q = 4'b0010;
-		11'b111_1100000: q = 4'b0010;
-		11'b111_1100001: q = 4'b0010;
-		11'b111_1100010: q = 4'b0010;
-		11'b111_1100011: q = 4'b0010;
-		11'b111_1100100: q = 4'b0010;
-		11'b111_1100101: q = 4'b0010;
-		11'b111_1100110: q = 4'b0010;
-		11'b111_1100111: q = 4'b0010;
-		11'b111_1101000: q = 4'b0010;
-		11'b111_1101001: q = 4'b0010;
-		11'b111_1101010: q = 4'b0010;
-		11'b111_1101011: q = 4'b0010;
-		11'b111_1101100: q = 4'b0010;
-		11'b111_1101101: q = 4'b0010;
-		11'b111_1101110: q = 4'b0010;
-		11'b111_1101111: q = 4'b0010;
-		11'b111_1110000: q = 4'b0000;
-		11'b111_1110001: q = 4'b0000;
-		11'b111_1110010: q = 4'b0000;
-		11'b111_1110011: q = 4'b0000;
-		11'b111_1110100: q = 4'b0000;
-		11'b111_1110101: q = 4'b0000;
-		11'b111_1110110: q = 4'b0000;
-		11'b111_1110111: q = 4'b0000;
-		11'b111_1111000: q = 4'b0000;
-		11'b111_1111001: q = 4'b0000;
-		11'b111_1111010: q = 4'b0000;
-		11'b111_1111011: q = 4'b0000;
-		11'b111_1111100: q = 4'b0000;
-		11'b111_1111101: q = 4'b0000;
-		11'b111_1111110: q = 4'b0000;
-		11'b111_1111111: q = 4'b0000;
-	endcase
diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen
index dadc5dc5..06615165 100755
Binary files a/pipelined/srt/sqrttestgen and b/pipelined/srt/sqrttestgen differ
diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c
index b4ece147..76c6a664 100644
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@@ -30,15 +30,11 @@ void main(void)
   FILE *fptr;
   double aFrac, rFrac;
   int    aExp,  rExp;
-  double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
+  double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
-			  1.1, 1.2, 1.01, 1.001, 1.0001,
-<<<<<<< Updated upstream
-			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
-=======
+			  1.1, 1.5, 1.01, 1.001, 1.0001,
 			  2/1.1, 2/1.5, 2/1.25, 2/1.125};
->>>>>>> Stashed changes
-  double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
         11, 12, 13, 14, 15, 16};
   int i;
   int bias = 1023;
@@ -51,10 +47,19 @@ void main(void)
   for (i=0; i<ENTRIES; i++) {
     aFrac = mans[i];
     aExp  = exps[i] + bias;
-    rFrac = sqrt(aFrac * pow(2, aExp - bias));
+    rFrac = sqrt(aFrac * pow(2, exps[i]));
     rExp  = (int) (log(rFrac)/log(2) + bias);
     output(fptr, aExp, aFrac, rExp, rFrac);
   }
+
+  //                                  WS
+  // Test 1: sqrt(1) = 1              0000 0000 0000 00
+  // Test 2: sqrt(1849/1024) = 43/32  0000 1100 1110 01
+  // Test 3: sqrt(5)                  0000 0100 0000 00
+  // Test 4: sqrt(9) = 3              1111 1001 0000 00
+  // Test 5: sqrt(17)                 0000 0001 0000 00
+  // Test 6: sqrt(56)                 1111 1110 0000 00
+  // Test 7: sqrt(120)                0000 1110 0000 00
   
   // for (i = 0; i< RANDOM_VECS; i++) {
   //   a = random_input();
@@ -69,14 +74,23 @@ void main(void)
 
 void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
 {
+  // Print a in standard double format
   fprintf(fptr, "%03x", aExp);
   printhex(fptr, aFrac);
   fprintf(fptr, "_");
+
+  // Spacing for testbench, value doesn't matter
+  fprintf(fptr, "%016x", 0);
+  fprintf(fptr, "_");
+
+  // Print r in standard double format
   fprintf(fptr, "%03x", rExp);
   printhex(fptr, rFrac);
+  fprintf(fptr, "_");
+
+  // Spacing for testbench, value doesn't matter
+  fprintf(fptr, "%016x", 0);
   fprintf(fptr, "\n");
-
-
 }
 
 void printhex(FILE *fptr, double m)
diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do
index 340c5b1f..1e0c3f28 100644
--- a/pipelined/srt/srt-waves.do
+++ b/pipelined/srt/srt-waves.do
@@ -1,5 +1,5 @@
 add wave -noupdate /testbench/*
 add wave -noupdate /testbench/srt/*
-add wave -noupdate /testbench/srt/otfc2/*
+add wave -noupdate /testbench/srt/sotfc2/*
 add wave -noupdate /testbench/srt/preproc/*
 add wave -noupdate /testbench/srt/divcounter/*
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 6e8cd560..949335bf 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -29,8 +29,6 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 `include "wally-config.vh"
-`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
-`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
 
 module srt (
   input  logic clk,
@@ -49,18 +47,19 @@ module srt (
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       rsign, done,
-  output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
+  output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers
   output logic [`NE-1:0] rExp,
   output logic [3:0] Flags
 );
 
-  logic           qp, qz, qm; // quotient is +1, 0, or -1
-  logic [`NE-1:0] calcExp;
-  logic           calcSign;
-  logic [`DIVLEN+3:0]  X, Dpreproc;
-  logic [`DIVLEN+3:0]  WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
+  logic                       qp, qz, qn; // quotient is +1, 0, or -1
+  logic [`NE-1:0]             calcExp;
+  logic                       calcSign;
+  logic [`DIVLEN+3:0]         X, Dpreproc, C, F, AddIn;
+  logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
-  logic           intSign;
+  logic                       intSign;
+  logic                       cin;
  
   srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
 
@@ -76,23 +75,31 @@ module srt (
 
   // Quotient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
+  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn);
 
   flopen #(`NE) expflop(clk, Start, calcExp, rExp);
   flopen #(1) signflop(clk, Start, calcSign, rsign);
   flopen #(7) durflop(clk, Start, calcDur, dur);
   
-  counter divcounter(clk, Start, dur, done);
+  srtcounter divcounter(clk, Start, dur, done);
 
   // Divisor Selection logic
   assign Db = ~D;
-  mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
+  mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel);
+
+  // If only implementing division, use divide otfc
+  // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
+  // otherwise use sotfc
+  creg   sotfcC(clk, Start, C);
+  sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F);
+
+  // Adder input selection
+  assign AddIn = Sqrt ? F : Dsel;
 
   // Partial Product Generation
-  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
+  assign cin = ~Sqrt & qp;
+  csa    #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA);
   
-  otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
-
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
   signcalc signcalc(.XSign, .YSign, .calcSign);
@@ -121,42 +128,53 @@ module srtpreproc (
 
   logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
   logic  [`XLEN-1:0] PosA, PosB;
-  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
+  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
+  logic  [`NF+4:0] SqrtX;
 
+  // Generate positive integer inputs if they are signed
   assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
   assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
 
+  // Calculate leading zeros of integer inputs
   lzc #(`XLEN) lzcA (PosA, zeroCntA);
   lzc #(`XLEN) lzcB (PosB, zeroCntB);
 
+  // Make integers have DIVLEN bits
   assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
   assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
 
+  // Shift integers to have leading ones
   assign PreprocA = ExtraA << (zeroCntA + 1);
   assign PreprocB = ExtraB << (zeroCntB + 1);
+
+  // Make mantissas have DIVLEN bits
   assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
   assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
 
+  // Selecting correct divider inputs
   assign DivX = Int ? PreprocA : PreprocX;
-  assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
-
-  assign X = Sqrt ? SqrtX : {4'b0001, DivX};
+  assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
+  assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
   assign D = {4'b0001, Int ? PreprocB : PreprocY};
+
+  // Integer exponent and sign calculations
   assign intExp = zeroCntB - zeroCntA + 1;
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
-  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
+  // Number of cycles of divider
+  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN);
 endmodule
 
 /////////////////////////////////
 // Quotient Selection, Radix 2 //
 /////////////////////////////////
 module qsel2 ( // *** eventually just change to 4 bits
-  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
-  output logic         qp, qz, qm
+  input  logic [`DIVLEN+3:`DIVLEN-1] ps, pc, 
+  input  logic         Sqrt,
+  output logic         qp, qz, qn
 );
  
-  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic [`DIVLEN+3:`DIVLEN-1]  p, g;
   logic          magnitude, sign, cout;
 
   // The quotient selection logic is presented for simplicity, not
@@ -167,8 +185,8 @@ module qsel2 ( // *** eventually just change to 4 bits
   assign p = ps ^ pc;
   assign g = ps & pc;
 
-  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
-  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
+  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]);
+  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1])))));
   assign #1 sign = p[`DIVLEN+3] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));
@@ -180,7 +198,7 @@ module qsel2 ( // *** eventually just change to 4 bits
   // Produce quotient = +1, 0, or -1
   assign #1 qp = magnitude & ~sign;
   assign #1 qz = ~magnitude;
-  assign #1 qm = magnitude & sign;
+  assign #1 qn = magnitude & sign;
 endmodule
 
 ////////////////////////////////////
@@ -191,45 +209,36 @@ module fsel2 (
   input  logic [`DIVLEN+3:0] C, S, SM,
   output logic [`DIVLEN+3:0] F
 );
-  logic [`DIVLEN+3:0] FP, FN;
+  logic [`DIVLEN+3:0] FP, FN, FZ;
   
   // Generate for both positive and negative bits
   assign FP = ~S & C;
   assign FN = SM | (C & (~C << 2));
+  assign FZ = {(`DIVLEN+4){1'b0}};
 
   // Choose which adder input will be used
 
-  assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0});
+  assign F = sp ? FP : (sn ? FN : FZ);
 
 endmodule
 
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
-module otfc2 #(parameter N=64) (
+module otfc2 #(parameter N=66) (
   input  logic         clk,
   input  logic         Start,
-  input  logic         qp, qz, qm,
-  output logic [N-1:0] r
+  input  logic         qp, qz, qn,
+  output logic [N-3:0] r
 );
-
   //  The on-the-fly converter transfers the quotient 
-  //  bits to the quotient as they come. 
-  //
-  //  This code follows the psuedocode presented in the 
-  //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-2 division.
-  //
-  //  QM is Q-1. It allows us to write negative bits 
-  //  without using a costly CPA. 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division only.
   logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
-  //  QR and QMR are the shifted versions of Q and QM.
-  //  They are treated as [N-1:r] size signals, and 
-  //  discard the r most significant bits of Q and QM. 
   logic [N+1:0] QR, QMR;
 
   flopr #(N+3) Qreg(clk, Start, QNext, Q);
-  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
+  mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux);
   flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   always_comb begin
@@ -241,35 +250,76 @@ module otfc2 #(parameter N=64) (
     end else if (qz) begin
       QNext  = {QR,  1'b0};
       QMNext = {QMR, 1'b1};
-    end else begin        // If qp and qz are not true, then qm is
+    end else begin        // If qp and qz are not true, then qn is
       QNext  = {QMR, 1'b1};
       QMNext = {QMR, 1'b0};
     end 
   end
-  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+  assign r = Q[N] ? Q[N-1:2] : Q[N-2:1];
 
 endmodule
 
 ///////////////////////////////
 // Square Root OTFC, Radix 2 //
 ///////////////////////////////
-module softc2(
-  input  logic clk,
-  input  logic Start,
-  input  logic sp, sn,
-  output logic S,
+module sotfc2(
+  input  logic         clk,
+  input  logic         Start,
+  input  logic         sp, sn,
+  input  logic [`DIVLEN+3:0] C,
+  output logic [`DIVLEN-2:0] Sq,
+  output logic [`DIVLEN+3:0] F
 );
+  //  The on-the-fly converter transfers the square root 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division and square root.
+  logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
+
+  flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
+  mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
+  flop #(`DIVLEN+4) Sreg(clk, SMux, S);
+
+  always_comb begin
+    if (sp) begin
+      SNext  = S | ((C << 1) & ~(C << 2));
+      SMNext = S;
+    end else if (sn) begin
+      SNext  = SM | ((C << 1) & ~(C << 2));
+      SMNext = SM;
+    end else begin        // If sp and sn are not true, then sz is
+      SNext  = S;
+      SMNext = SM | ((C << 1) & ~(C << 2));
+    end 
+  end
+  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
+
+  fsel2 fsel(sp, sn, C, S, SM, F);
 
 endmodule
+
+//////////////////////////
+// C Register for SOTFC //
+//////////////////////////
+module creg(input  logic clk,
+            input  logic Start,
+            output logic [`DIVLEN+3:0] C
+);
+  logic [`DIVLEN+3:0] CMux;
+
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux);
+  flop #(`DIVLEN+4) cflop(clk, CMux, C);
+endmodule
+
 /////////////
 // counter //
 /////////////
-module counter(input  logic clk, 
-               input  logic req, 
-               input  logic [$clog2(`XLEN+1)-1:0] dur,
-               output logic done);
+module srtcounter(input  logic clk, 
+                  input  logic req, 
+                  input  logic [$clog2(`XLEN+1)-1:0] dur,
+                  output logic done
+);
  
-   logic    [$clog2(`XLEN+1)-1:0]  count;
+  logic    [$clog2(`XLEN+1)-1:0]  count;
 
   // This block of control logic sequences the divider
   // through its iterations.  You may modify it if you
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 83f33707..39696af4 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -1,4 +1,4 @@
-`define DIVLEN 64
+`include "wally-config.vh"
 
 /////////////
 // counter //
@@ -39,37 +39,27 @@ endmodule
 // testbench //
 //////////
 module testbench;
-  logic              clk;
-  logic              req;
-  logic              done;
-  logic              Int;
-  logic [63:0]       a, b;
-  logic [51:0]       afrac, bfrac;
-  logic [10:0]       aExp, bExp;
-  logic              asign, bsign;
-  logic [51:0]       r;
-  logic [63:0]       rInt;
-  logic [`DIVLEN-1:0]  Quot;
+  logic               clk;
+  logic               req;
+  logic               done;
+  logic               Int;
+  logic [`XLEN-1:0]   a, b;
+  logic [`NF-1:0]     afrac, bfrac;
+  logic [`NE-1:0]     aExp, bExp;
+  logic               asign, bsign;
+  logic [`NF-1:0]     r;
+  logic [`XLEN-1:0]   rInt;
+  logic [`DIVLEN-2:0] Quot;
  
   // Test parameters
   parameter MEM_SIZE = 40000;
   parameter MEM_WIDTH = 64+64+64+64;
  
-  // INT TEST SIZES
-  // `define memrem  63:0 
-  // `define memr  127:64
-  // `define memb  191:128
-  // `define mema  255:192
-
-  // FLOAT TEST SIZES
-  // `define memr  63:0 
-  // `define memb  127:64
-  // `define mema  191:128
-
-  // SQRT TEST SIZES 
-  `define memr  63:0 
-  `define mema  127:64
+  // Test sizes
+  `define memrem  63:0 
+  `define memr  127:64
   `define memb  191:128
+  `define mema  255:192
 
   // Test logicisters
   logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
@@ -118,16 +108,16 @@ module testbench;
       b = Vec[`memb];
       {bsign, bExp, bfrac} = b;
       nextr = Vec[`memr];
-      r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
-      rInt = Quot;
+      r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
+      rInt = {1'b1, Quot};
       req <= #5 1;
     end
   
   // Apply directed test vectors read from file.
 
   always @(posedge clk) begin
-    r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
-    rInt = Quot;
+    r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
+    rInt = {1'b1, Quot};
     if (done) begin
       if (~Int & ~Sqrt) begin
         req <= #5 1;
@@ -165,15 +155,14 @@ module testbench;
         req <= #5 1;
         diffp = correctr[51:0] - r;
         diffn = r - correctr[51:0];
-        if (rExp !== correctr[62:52]) // check if accurate to 1 ulp
+        if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
           begin
             errors = errors + 1;
             $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
             $display("failed\n");
-            $stop;
           end
         if (afrac === 52'hxxxxxxxxxxxxx) begin 
-          $display("%d Tests completed successfully", testnum);
+          $display("%d Tests completed successfully", testnum-errors);
           $stop; end 
       end
     end
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index fa46a060..b90c3d3d 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -80,17 +80,17 @@ module testbenchfp;
   logic CvtResSgnE;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
-	logic [`DIVLEN+2:0] Quot;
+	logic [`QLEN-1-(`RADIX/4):0] Quot;
   logic CvtResDenormUfE;
-  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
+  logic [`DURLEN-1:0] EarlyTermShift;
   logic DivStart, DivBusy;
   logic reset = 1'b0;
   logic [`DIVLEN-1:0]    DivX;
   logic [`DIVLEN-1:0]  Dpreproc;
-  logic [`DIVLEN+3:0]  WSN, WS;
-  logic [`DIVLEN+3:0]  WCN, WC;
+  logic [`DIVLEN+3:0]  NextWSN, WS;
+  logic [`DIVLEN+3:0]  NextWCN, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
-  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DURLEN-1:0] Dur;
 
   // in-between FMA signals
   logic                 Mult;
@@ -679,15 +679,15 @@ module testbenchfp;
           .Pe, .ZmSticky, .KillProd); 
               
   postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
-              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
-              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
-              .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky,
+              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky),
+              .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-              .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
-              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-              .PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes));
+              .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
+              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   
   fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), 
             .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
@@ -695,11 +695,10 @@ module testbenchfp;
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
-  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
-                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
-                .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
+  divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), 
+                  .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), 
+                  .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp),
+                  .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone);
 
   assign CmpFlg[3:0] = 0;
 
@@ -854,7 +853,7 @@ end
 
     // check if result is correct
     //  - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv
index 8f6f99a5..0fb5f5e6 100644
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@@ -114,6 +114,7 @@ logic [3:0] dummy;
         "arch32f":      if (`F_SUPPORTED) tests = arch32f;
         "imperas32i":                     tests = imperas32i;
         "imperas32f":   if (`F_SUPPORTED) tests = imperas32f;
+        // "wally32d":     if (`D_SUPPORTED) tests = wally32d;
         "imperas32m":   if (`M_SUPPORTED) tests = imperas32m;
         "wally32a":     if (`A_SUPPORTED) tests = wally32a;
         "imperas32c":   if (`C_SUPPORTED) tests = imperas32c;
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 8971e544..4b1b9a16 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -34,7 +34,7 @@
 string tvpaths[] = '{
     "../../addins/imperas-riscv-tests/work/",
     "../../tests/riscof/work/riscv-arch-test/",
-    "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", 
+    "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", //
     "../../tests/imperas-riscv-tests/work/",
     "../../benchmarks/coremark/work/",
     "../../addins/embench-iot/"
diff --git a/synthDC/Makefile b/synthDC/Makefile
index 369529e3..98b71942 100755
--- a/synthDC/Makefile
+++ b/synthDC/Makefile
@@ -107,7 +107,7 @@ ifeq ($(SAIFPOWER), 1)
 	cp -f ../pipelined/regression/power.saif .
 endif
 	dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out
-	rm -rf $(OUTPUTDIR)/hdl
+#	rm -rf $(OUTPUTDIR)/hdl
 	rm -rf $(OUTPUTDIR)/WORK
 	rm -rf $(OUTPUTDIR)/alib-52
 
diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py
index 93363a06..978365b1 100755
--- a/synthDC/extractSummary.py
+++ b/synthDC/extractSummary.py
@@ -11,6 +11,7 @@ import numpy as np
 from ppa.ppaAnalyze import noOutliers
 from matplotlib import ticker
 import argparse
+import os
 
 
 def synthsintocsv():
@@ -59,6 +60,7 @@ def synthsintocsv():
             writer.writerow([width, config, special, tech, freq, delay, area])
     file.close()
 
+	
 def synthsfromcsv(filename):
     Synth = namedtuple("Synth", "width config special tech freq delay area")
     with open(filename, newline='') as csvfile:
@@ -74,10 +76,16 @@ def synthsfromcsv(filename):
             allSynths[i] = Synth(*allSynths[i])
     return allSynths
 
+
 def freqPlot(tech, width, config):
     ''' plots delay, area for syntheses with specified tech, module, width
     '''
 
+    current_directory = os.getcwd()
+    final_directory = os.path.join(current_directory, 'plots/wally')
+    if not os.path.exists(final_directory):
+        os.makedirs(final_directory)
+
     freqsL, delaysL, areasL = ([[], []] for i in range(3))
     for oneSynth in allSynths:
         if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special):
@@ -151,6 +159,7 @@ def areaDelay(tech, delays, areas, labels, fig, ax, norm=False):
 
     return fig
 
+
 def plotFeatures(tech, width, config):
     delays, areas, labels = ([] for i in range(3))
     freq = techdict[tech].targfreq
@@ -168,7 +177,8 @@ def plotFeatures(tech, width, config):
     titlestr = tech+'_'+width+config
     plt.title(titlestr)
     plt.savefig('./plots/wally/features_'+titlestr+'.png')
-    
+
+	
 def plotConfigs(tech, special=''):
     delays, areas, labels = ([] for i in range(3))
     freq = techdict[tech].targfreq
@@ -207,7 +217,8 @@ def normAreaDelay(special=''):
     ax.set_ylabel('Area (add32)')        
     ax.legend(handles = fullLeg, loc='upper left')
     plt.savefig('./plots/wally/normAreaDelay.png')
-    
+
+	
 def addFO4axis(fig, ax, tech):
     fo4 = techdict[tech].fo4
 
diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl
index 9f2b4647..9b72849f 100755
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@@ -56,7 +56,7 @@ set vhdlout_show_unconnected_pins "true"
 # Due to parameterized Verilog must use analyze/elaborate and not 
 # read_verilog/vhdl (change to pull in Verilog and/or VHDL)
 #
-set alib_library_analysis_path ./$outputDir
+#set alib_library_analysis_path ./$outputDir
 define_design_lib WORK -path ./$outputDir/WORK
 analyze -f sverilog -lib WORK $my_verilog_files
 elaborate $my_toplevel -lib WORK 
@@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }
 
diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index 830b9eef..af67a535 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
 current_dir = $(shell pwd)
 XLEN    ?= 64
 
-all: root build_arch # build_wally memfile
+all: root build_arch #build_wally memfile
 
 root:
 	mkdir -p $(work_dir)
diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py
index 88a6269e..fd429395 100644
--- a/tests/riscof/spike/riscof_spike.py
+++ b/tests/riscof/spike/riscof_spike.py
@@ -108,7 +108,7 @@ class spike(pluginTemplate):
 
       #TODO: The following assumes you are using the riscv-gcc toolchain. If
       #      not please change appropriately
-      self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ')
+      self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 '))
 
     def runTests(self, testList):
 
@@ -158,7 +158,12 @@ class spike(pluginTemplate):
 	  # echo statement.
           if self.target_run:
             # set up the simulation command. Template is for spike. Please change.
-            simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
+            if ('NO_SAIL=True' in testentry['macros']):
+                # if the tests can't run on SAIL we copy the reference output to the src directory
+                reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test))
+                simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying
+            else:
+                simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
           else:
             simcmd = 'echo "NO RUN"'
 
diff --git a/tests/riscof/spike/spike_rv32imc_isa.yaml b/tests/riscof/spike/spike_rv32imc_isa.yaml
index 5a76fd97..04a5da18 100644
--- a/tests/riscof/spike/spike_rv32imc_isa.yaml
+++ b/tests/riscof/spike/spike_rv32imc_isa.yaml
@@ -1,11 +1,11 @@
 hart_ids: [0]
 hart0:
-  ISA: RV32IMAFCZicsr_Zifencei
+  ISA: RV32IMAFDCZicsr_Zifencei
   physical_addr_sz: 32
   User_Spec_Version: '2.3'
   supported_xlen: [32]
   misa:
-   reset-val: 0x40001125
+   reset-val: 0x4000112D
    rv32:
      accessible: true
      mxl:
@@ -23,6 +23,6 @@ hart0:
            warl:
               dependency_fields: []
               legal:
-                - extensions[25:0] bitmask [0x0001125, 0x0000000]
+                - extensions[25:0] bitmask [0x000112D, 0x0000000]
               wr_illegal:
                 - Unchanged
\ No newline at end of file