From cd53ae67d9a2ba5f5ec8ace9cacaf3de56e3ccb2 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 8 Jul 2022 23:56:57 +0000
Subject: [PATCH 01/36] moved fpu ieu write data mux to lsu

---
 pipelined/regression/sim-wally-batch |  2 +-
 pipelined/src/cache/cache.sv         | 14 ++++----------
 pipelined/src/ifu/ifu.sv             |  2 +-
 pipelined/src/lsu/lsu.sv             | 17 +++++++++++------
 pipelined/testbench/testbench.sv     |  1 +
 pipelined/testbench/tests.vh         |  2 +-
 tests/riscof/Makefile                |  2 +-
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch
index 91f11697..7e821e58 100755
--- a/pipelined/regression/sim-wally-batch
+++ b/pipelined/regression/sim-wally-batch
@@ -1 +1 @@
-vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f"
+vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"
diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index f6aad78e..b80df13a 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -42,10 +42,8 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   input logic [11:0]          NextAdr, // virtual address, but we only use the lower 12 bits.
   input logic [`PA_BITS-1:0]  PAdr, // physical address
   input logic [(`XLEN-1)/8:0] ByteMask,
-  input logic [`XLEN-1:0]     FinalWriteData,
-  input logic [`FLEN-1:0]     FWriteDataM,
+  input logic [WORDLEN-1:0]     FinalWriteData,
   input logic                        FLoad2,
-  input logic                 FpLoadStoreM,
   output logic                CacheCommitted,
   output logic                CacheStall,
    // to performance counters to cpu
@@ -72,7 +70,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   localparam                  SETLEN = $clog2(NUMLINES);
   localparam                  SETTOP = SETLEN+OFFSETLEN;
   localparam                  TAGLEN = `PA_BITS - SETTOP;
-  localparam                  WORDSPERLINE = LINELEN/`XLEN;
+  localparam                  WORDSPERLINE = LINELEN/WORDLEN;
   localparam                  FlushAdrThreshold   = NUMLINES - 1;
 
   logic                       SelAdr;
@@ -162,12 +160,8 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   /////////////////////////////////////////////////////////////////////////////////////////////
   // Write Path: Write data and address. Muxes between writes from bus and writes from CPU.
   /////////////////////////////////////////////////////////////////////////////////////////////
-  if (`LLEN>`XLEN)
-    mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
-      .d1({WORDSPERLINE/2{FWriteDataM}}),	.d2(CacheBusWriteData),	.s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData));
-  else
-    mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
-      .d1(CacheBusWriteData),	.s(SetValid), .y(CacheWriteData));
+  mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}),
+  .d1(CacheBusWriteData),	.s(SetValid), .y(CacheWriteData));
   mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
 		.d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}),
 		.d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}),
diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv
index 0c43c736..c96396ab 100644
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@@ -226,7 +226,7 @@ module ifu (
       icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
              .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
              .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), 
-             .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(),
+             .CacheFetchLine(ICacheFetchLine), .FLoad2(),
              .CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
              .Cacheable(CacheableF),
              .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv
index 4b200f70..a63f813e 100644
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@@ -192,7 +192,8 @@ module lsu (
   //  Memory System
   //  Either Data Cache or Data Tightly Integrated Memory or just bus interface
   /////////////////////////////////////////////////////////////////////////////////////////////
-  logic [`XLEN-1:0]    AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
+  logic [`XLEN-1:0]    AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM;
+  logic [`LLEN-1:0]    FinalWriteDataM;
   logic [`LLEN-1:0]    ReadDataWordM, LittleEndianReadDataWordM;
   logic [`LLEN-1:0]    ReadDataWordMuxM;
   logic                IgnoreRequest;
@@ -202,7 +203,7 @@ module lsu (
   if (`DMEM == `MEM_TIM) begin : dtim
     // *** directly instantiate RAM or ROM here.  Instantiate SRAM1P1RW.  
     // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
-    dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, 
+    dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData
               .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
               .DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
               .DCacheMiss, .DCacheAccess);
@@ -230,15 +231,19 @@ module lsu (
 
     mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
       .s(SelUncachedAdr), .y(ReadDataWordMuxM));
-    mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
+    mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM),
       .s(SelUncachedAdr), .y(LSUBusHWDATA));
     
     if(CACHE_ENABLED) begin : dcache
+      if (`LLEN>`FLEN)
+        mux2 #(`LLEN) datamux({(`LLEN-`XLEN)'(0), IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
+      else
+        assign FinalWriteDataM[`XLEN-1:0] = IEUWriteDataM;
       cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
               .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
         .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
         .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), 
-        .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2,
+        .ByteMask(ByteMaskM), .WordCount, .FLoad2,
         .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
         .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
         .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), 
@@ -286,10 +291,10 @@ module lsu (
   //  swap the bytes when read from big-endian memory
   /////////////////////////////////////////////////////////////////////////////////////////////
   if (`BIGENDIAN_SUPPORTED) begin:endian
-    bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
+    bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM));
     bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
   end else begin
-    assign FinalWriteDataM = LittleEndianWriteDataM;
+    assign IEUWriteDataM = LittleEndianWriteDataM;
     assign LittleEndianReadDataWordM = ReadDataWordM;
   end
 
diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv
index c248a750..40ea9a58 100644
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@@ -114,6 +114,7 @@ logic [3:0] dummy;
         "arch32f":      if (`F_SUPPORTED) tests = arch32f;
         "imperas32i":                     tests = imperas32i;
         "imperas32f":   if (`F_SUPPORTED) tests = imperas32f;
+        "wally32d":     if (`D_SUPPORTED) tests = wally32d;
         "imperas32m":   if (`M_SUPPORTED) tests = imperas32m;
         "wally32a":     if (`A_SUPPORTED) tests = wally32a;
         "imperas32c":   if (`C_SUPPORTED) tests = imperas32c;
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 0d57dbdd..c6ebf08c 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -34,7 +34,7 @@
 string tvpaths[] = '{
     "../../addins/imperas-riscv-tests/work/",
     "../../tests/riscof/work/riscv-arch-test/",
-    "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", 
+    "../../tests/riscof/work/wally-riscv-arch-test/", //"../../tests/wally-riscv-arch-test/work/", //
     "../../tests/imperas-riscv-tests/work/",
     "../../benchmarks/coremark/work/",
     "../../addins/embench-iot/"
diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index 830b9eef..621a5b54 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
 current_dir = $(shell pwd)
 XLEN    ?= 64
 
-all: root build_arch # build_wally memfile
+all: root build_arch build_wally memfile
 
 root:
 	mkdir -p $(work_dir)

From ca4fe08fd9bbb8b9b38862c31ee8583f02a5c873 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Sat, 9 Jul 2022 00:26:45 +0000
Subject: [PATCH 02/36] renamed FLoad2 to FStore2

---
 pipelined/src/cache/cache.sv              | 4 ++--
 pipelined/src/cache/cacheway.sv           | 4 ++--
 pipelined/src/fpu/fpu.sv                  | 6 +++---
 pipelined/src/ifu/ifu.sv                  | 2 +-
 pipelined/src/lsu/lsu.sv                  | 8 ++++----
 pipelined/src/wally/wallypipelinedcore.sv | 6 +++---
 pipelined/testbench/tests.vh              | 2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index b80df13a..ca6a5c9c 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -43,7 +43,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
   input logic [`PA_BITS-1:0]  PAdr, // physical address
   input logic [(`XLEN-1)/8:0] ByteMask,
   input logic [WORDLEN-1:0]     FinalWriteData,
-  input logic                        FLoad2,
+  input logic                        FStore2,
   output logic                CacheCommitted,
   output logic                CacheStall,
    // to performance counters to cpu
@@ -121,7 +121,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
 
   // Array of cache ways, along with victim, hit, dirty, and read merging logic
   cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) 
-    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2,
+    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2,
     .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
     .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, 
     .Invalidate(InvalidateCacheM));
diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv
index ac1e26e8..d1c85675 100644
--- a/pipelined/src/cache/cacheway.sv
+++ b/pipelined/src/cache/cacheway.sv
@@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
   input logic [$clog2(NUMLINES)-1:0] RAdr,
   input logic [`PA_BITS-1:0]         PAdr,
   input logic [LINELEN-1:0]          CacheWriteData,
-  input logic                        FLoad2,
+  input logic                        FStore2,
   input logic                        SetValidWay,
   input logic                        ClearValidWay,
   input logic                        SetDirtyWay,
@@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
     logic [2**LOGWPL-1:0] MemPAdrDecodedtmp;
     onehotdecoder #(LOGWPL) adrdec(
       .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp));
-    assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0};
+    assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0};
   end else
     onehotdecoder #(LOGWPL) adrdec(
       .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 7cf10901..255feb44 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -42,7 +42,7 @@ module fpu (
   input logic [1:0]        STATUS_FS, // Is floating-point enabled?
   output logic 		   FRegWriteM, // FP register write enable
   output logic 		   FpLoadStoreM, // Fp load instruction?
-  output logic              FLoad2,
+  output logic              FStore2,
   output logic 		   FStallD, // Stall the decode stage
   output logic 		   FWriteIntE, // integer register write enables
   output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
@@ -298,8 +298,8 @@ module fpu (
       assign FWriteDataE = FSrcYE[`XLEN-1:0]; 
    end else begin
       logic [`FLEN-1:0] FWriteDataE;
-      if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT;
-      else assign FLoad2 = FmtM;
+      if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
+      else assign FStore2 = FmtM;
 
       if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
       else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv
index c96396ab..5c2f799d 100644
--- a/pipelined/src/ifu/ifu.sv
+++ b/pipelined/src/ifu/ifu.sv
@@ -226,7 +226,7 @@ module ifu (
       icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0),
              .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
              .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), 
-             .CacheFetchLine(ICacheFetchLine), .FLoad2(),
+             .CacheFetchLine(ICacheFetchLine), .FStore2(),
              .CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
              .Cacheable(CacheableF),
              .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv
index a63f813e..50ecdb18 100644
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@@ -58,7 +58,7 @@ module lsu (
    input logic              sfencevmaM,
    // fpu
    input logic [`FLEN-1:0]  FWriteDataM,
-   input logic              FLoad2,
+   input logic              FStore2,
    input logic              FpLoadStoreM,
    // faults
    output logic             LoadPageFaultM, StoreAmoPageFaultM,
@@ -236,14 +236,14 @@ module lsu (
     
     if(CACHE_ENABLED) begin : dcache
       if (`LLEN>`FLEN)
-        mux2 #(`LLEN) datamux({(`LLEN-`XLEN)'(0), IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
+        mux2 #(`LLEN) datamux({{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
       else
-        assign FinalWriteDataM[`XLEN-1:0] = IEUWriteDataM;
+        assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
       cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
               .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
         .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
         .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), 
-        .ByteMask(ByteMaskM), .WordCount, .FLoad2,
+        .ByteMask(ByteMaskM), .WordCount, .FStore2,
         .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
         .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
         .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), 
diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv
index 7538a541..372f4aba 100644
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@@ -93,7 +93,7 @@ module wallypipelinedcore (
   logic             FStallD;
   logic             FWriteIntE;
   logic [`XLEN-1:0]         FWriteDataE;
-  logic                     FLoad2;
+  logic                     FStore2;
   logic [`FLEN-1:0]         FWriteDataM;
   logic [`XLEN-1:0]         FIntResM;  
   logic [`XLEN-1:0]         FCvtIntResW;  
@@ -259,7 +259,7 @@ module wallypipelinedcore (
   .CommittedM, .DCacheMiss, .DCacheAccess,
   .SquashSCW,            
   .FpLoadStoreM,
-  .FWriteDataM, .FLoad2,
+  .FWriteDataM, .FStore2,
   //.DataMisalignedM(DataMisalignedM),
   .IEUAdrE, .IEUAdrM, .WriteDataE,
   .ReadDataW, .FlushDCacheM,
@@ -400,7 +400,7 @@ module wallypipelinedcore (
          .STATUS_FS, // is floating-point enabled?
          .FRegWriteM, // FP register write enable
          .FpLoadStoreM,
-         .FLoad2,
+         .FStore2,
          .FStallD, // Stall the decode stage
          .FWriteIntE, // integer register write enable
          .FWriteDataE, // Data to be written to memory
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index c6ebf08c..e1b69cd2 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -34,7 +34,7 @@
 string tvpaths[] = '{
     "../../addins/imperas-riscv-tests/work/",
     "../../tests/riscof/work/riscv-arch-test/",
-    "../../tests/riscof/work/wally-riscv-arch-test/", //"../../tests/wally-riscv-arch-test/work/", //
+    "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", //
     "../../tests/imperas-riscv-tests/work/",
     "../../benchmarks/coremark/work/",
     "../../addins/embench-iot/"

From b728e5054d2fb581321985668efabfca439b61b6 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 11 Jul 2022 18:30:21 -0700
Subject: [PATCH 03/36] variable interations implemented in radix-4 divider

---
 pipelined/config/rv64fp/wally-config.vh |   4 +-
 pipelined/config/shared/wally-shared.vh |  12 +-
 pipelined/regression/wave-fpu.do        |  10 +-
 pipelined/src/fpu/divshiftcalc.sv       |   9 +-
 pipelined/src/fpu/divsqrt.sv            |  10 +-
 pipelined/src/fpu/flags.sv              |  38 ++--
 pipelined/src/fpu/fmashiftcalc.sv       |  24 +-
 pipelined/src/fpu/fpu.sv                |  10 +-
 pipelined/src/fpu/lzacorrection.sv      |   4 +-
 pipelined/src/fpu/postprocess.sv        |  26 +--
 pipelined/src/fpu/resultselect.sv       | 290 ------------------------
 pipelined/src/fpu/resultsign.sv         |   4 +-
 pipelined/src/fpu/round.sv              |   6 +-
 pipelined/src/fpu/srt-radix4.sv         | 140 ++++++++----
 pipelined/src/fpu/srtfsm.sv             |  10 +-
 pipelined/src/fpu/srtpreproc.sv         |  16 +-
 pipelined/testbench/testbench-fp.sv     |  14 +-
 17 files changed, 203 insertions(+), 424 deletions(-)
 delete mode 100644 pipelined/src/fpu/resultselect.sv

diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index b92bc07a..cc8d1b2b 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -32,14 +32,14 @@
 `define DESIGN_COMPILER 0
 
 // RV32 or RV64: XLEN = 32 or 64
-`define XLEN 64
+`define XLEN 32
 
 // IEEE 754 compliance
 `define IEEE754 0
 
 // MISA RISC-V configuration per specification
 //                    ZYXWVUTSRQPONMLKJIHGFEDCBA
-`define MISA 32'b0000000000101000001000100100101
+`define MISA 32'b0000000000101000001000100101101
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 671f7343..c064783c 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -95,12 +95,22 @@
 
 // largest length in IEU/FPU
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
-`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
 `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
 `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
 
+// division constants
+`define RADIX 4
+`define DIVCOPIES 4
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
+`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
+`define LOGR ((`RADIX==2) ? 1 : 2)
+`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES))
+`define DURLEN ($clog2($rtoi(`FPDUR)+1))
+`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES)
+
+
 `define USE_SRAM 0
 
 // Disable spurious Verilator warnings
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 9e7ba49b..58f782bd 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -11,7 +11,7 @@ add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/DivBusy
 add wave -noupdate /testbenchfp/srtfsm/state
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
@@ -21,8 +21,12 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
+add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/*
+add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/*
+add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/*
 add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 935ed3c1..a4f3feff 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -1,9 +1,9 @@
 `include "wally-config.vh"
 
 module divshiftcalc(
-    input logic  [`DIVLEN+2:0] Quot,
+    input logic  [`QLEN-1:0] Quot,
     input logic  [`FMTBITS-1:0] Fmt,
-    input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2,
+    input logic [`DURLEN-1:0] DivEarlyTermShift,
     input logic [`NE+1:0] DivCalcExp,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
@@ -32,9 +32,10 @@ module divshiftcalc(
     // inital Left shift amount  = NF
     assign NormShift = (`NE+2)'(`NF);
     // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};
+    // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
+    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)};
 
     // *** may be able to reduce shifter size
-    assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
+    assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}};
 
 endmodule
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index 086b97d8..c4f09aea 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -47,8 +47,8 @@ module divsqrt(
   output logic DivBusy,
   output logic DivDone,
   output logic [`NE+1:0] DivCalcExpM,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
-  output logic [`DIVLEN+2:0] QuotM
+  output logic [`DURLEN-1:0] EarlyTermShiftM,
+  output logic [`QLEN-1:0] QuotM
 //   output logic [`XLEN-1:0] RemM,
 );
 
@@ -57,12 +57,12 @@ module divsqrt(
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DIVLEN-1:0] X;
   logic [`DIVLEN-1:0] Dpreproc;
-  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DURLEN-1:0] Dur;
 
   srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
   srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
-                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
+                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
+  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
                 .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 98250a45..4e16bc96 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -34,20 +34,20 @@ module flags(
     input logic                 XInf, YInf, ZInf,    // inputs are infinity
     input logic                 Plus1,
     input logic                 InfIn,                  // is a Inf input being used
+    input logic                 NaNIn,                  // is a NaN input being used
+    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic                 XZero, YZero,         // inputs are zero
     input logic                 XNaN, YNaN,           // inputs are NaN
-    input logic                 NaNIn,                  // is a NaN input being used
     input logic                 Sqrt,                   // Sqrt?
     input logic                 ToInt,                  // convert to integer
     input logic                 IntToFp,                // convert integer to floating point
     input logic                 Int64,                  // convert to 64 bit integer
     input logic                 Signed,                 // convert to a signed integer
-    input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic [`NE:0]         CvtCe,            // the calculated expoent - Cvt
     input logic                 CvtOp,                  // conversion opperation?
     input logic                 DivOp,                  // conversion opperation?
     input logic                 FmaOp,                  // Fma opperation?
-    input logic  [`NE+1:0]      FullResExp,             // Re with bits to determine sign and overflow
+    input logic  [`NE+1:0]      FullRe,             // Re with bits to determine sign and overflow
     input logic  [`NE+1:0]      Nexp,               // exponent of the normalized sum
     input logic  [1:0]          CvtNegResMsbs,             // the negitive integer result's most significant bits
     input logic                 FmaAs, FmaPs,        // the product and modified Z signs
@@ -73,30 +73,30 @@ module flags(
 
 
    if (`FPSIZES == 1) begin
-        assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
-        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+        assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+        assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
 
     end else if (`FPSIZES == 2) begin    
-        assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
+        assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
 
-        assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+        assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
     end else if (`FPSIZES == 3) begin
         always_comb
             case (OutFmt)
-                `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
-                `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
-                `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
+                `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+                `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+                `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
                 default: ResExpGteMax = 1'bx;
             endcase
-            assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+            assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
 
     end else if (`FPSIZES == 4) begin        
         always_comb
             case (OutFmt)
-                `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
-                `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
-                `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
-                `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
+                `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
+                `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
+                `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
+                `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
             endcase
             // a left shift of intlen+1 is still in range but any more than that is an overflow
             //           inital: |      64 0's         |    XLEN     |
@@ -110,14 +110,14 @@ module flags(
             //      - any of the bits after the most significan 1 is one
             //      - the most signifcant in 65 or 33 is still a one in the number and
             //        one of the later bits is one
-            assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
+            assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
     end
 
     //                 if the result is greater than or equal to the max exponent(not taking into account sign)
     //                 |           and the exponent isn't negitive
     //                 |           |                   if the input isnt infinity or NaN
     //                 |           |                   |            
-    assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero);
+    assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
 
     // detecting tininess after rounding
     //                  the exponent is negitive
@@ -127,7 +127,7 @@ module flags(
     //                  |                    |                    |                                      |                     and if the result is not exact
     //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
     //                  |                    |                    |                                      |                     |               |
-    assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
+    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
 
     // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
     //      - Don't set the underflow flag if an underflowed res isn't outputed
@@ -153,7 +153,7 @@ module flags(
     //                  |           |                                  |                    |               or the res rounds up out of bounds
     //                  |           |                                  |                    |                       and the res didn't underflow
     //                  |           |                                  |                    |                       |
-    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
+    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
     //                                                                                                     |
     //                                                                                                     or when the positive res rounds up out of range
     assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 5f55e17b..d4898e80 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -37,7 +37,7 @@ module fmashiftcalc(
     input logic                         FmaKillProd,  // is the product set to zero
     input logic 			            ZDenorm,
     output logic [`NE+1:0]              FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
-    output logic                        FmaSmZero,    // is the result denormalized - calculated before LZA corection
+    output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
     output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
@@ -50,7 +50,7 @@ module fmashiftcalc(
     ///////////////////////////////////////////////////////////////////////////////
     //*** insert bias-bias simplification in fcvt.sv/phone pictures
     // Determine if the sum is zero
-    assign FmaSmZero = ~(|FmaSm);
+    assign FmaSZero = ~(|FmaSm);
 
     // calculate the sum's exponent
     assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
@@ -90,7 +90,7 @@ module fmashiftcalc(
         logic Sum0LEZ, Sum0GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
         assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
+        assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
 
     end else if (`FPSIZES == 2) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
@@ -98,7 +98,7 @@ module fmashiftcalc(
         assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
         assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
         assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
-        assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero;
+        assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
 
     end else if (`FPSIZES == 3) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
@@ -110,9 +110,9 @@ module fmashiftcalc(
         assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
-                `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
-                `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
-                `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
+                `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+                `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+                `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
                 default: FmaPreResultDenorm = 1'bx;
             endcase
         end
@@ -129,10 +129,10 @@ module fmashiftcalc(
         assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
-                2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero;
-                2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero;
-                2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero;
-                2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero;
+                2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
+                2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
+                2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
+                2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
             endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
         end
 
@@ -144,7 +144,7 @@ module fmashiftcalc(
     //      - if kill prod dont add to exp
 
     // Determine if the result is denormal
-    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero;
+    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 8336c39c..e1c9e5fa 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -125,12 +125,12 @@ module fpu (
    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`DIVLEN+2:0] QuotE, QuotM;
+   logic [`QLEN-1:0] QuotM;
    logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
    logic DivNegStickyE, DivNegStickyM;
    logic DivStickyE, DivStickyM;
    logic DivDoneM;
-   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;
+   logic [`DURLEN-1:0] EarlyTermShiftM;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -289,7 +289,7 @@ module fpu (
    divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
                   .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
-                  .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
+                  .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
    // other FP execution units
    fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
             .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
@@ -381,12 +381,12 @@ module fpu (
 
    assign FpLoadStoreM = FResSelM[1];
 
-   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M),
+   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
                            .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
                            .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
                            .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
                            .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
-                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
+                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
    // FPU flag selection - to privileged
    mux2  #(5)  FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv
index 03b36f4f..17db0c0b 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@@ -38,7 +38,7 @@ module lzacorrection(
     input logic  [`NE+1:0]          FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     input logic                     FmaKillProd,  // is the product set to zero
-    input logic                     FmaSmZero,
+    input logic                     FmaSZero,
     output logic [`CORRSHIFTSZ-1:0] Nfrac,         // the shifted sum before LZA correction
     output logic [`NE+1:0]          DivCorrExp,
     output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
@@ -59,7 +59,7 @@ module lzacorrection(
     assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}};
+    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index e165e7e1..18452abd 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -54,12 +54,12 @@ module postprocess(
     input logic                             FmaInvA,      // do you invert Z
     input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
     //divide signals
-    input logic  [$clog2(`DIVLEN/2+3)-1:0]  DivEarlyTermShiftDiv2,
+    input logic  [`DURLEN-1:0]              DivEarlyTermShift,
     input logic                             DivSticky,
     input logic                             DivNegSticky,
     input logic                             DivDone,
     input logic  [`NE+1:0]                  DivCalcExp,
-    input logic  [`DIVLEN+2:0]              Quot,
+    input logic  [`QLEN-1:0]                Quot,
     // conversion signals
     input logic                             CvtCs,     // the result's sign
     input logic  [`NE:0]                    CvtCe,    // the calculated expoent
@@ -69,7 +69,7 @@ module postprocess(
     input logic  [`CVTLEN-1:0]              CvtLzcIn,      // input to the Leading Zero Counter (priority encoder)
     input logic                             IntZero,         // is the input zero
     // final results
-    output logic [`FLEN-1:0]                W,    // FMA final result
+    output logic [`FLEN-1:0]                PostProcRes,    // FMA final result
     output logic [4:0]                      PostProcFlg,
     output logic [`XLEN-1:0]                FCvtIntRes    // the int conversion result
     );
@@ -81,7 +81,7 @@ module postprocess(
     logic Nsgn;
     logic [`NE+1:0] Nexp;
     logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
-    logic [`NE+1:0] FullResExp;  // Re with bits to determine sign and overflow
+    logic [`NE+1:0] FullRe;  // Re with bits to determine sign and overflow
     logic S;           // S bit
     logic UfPlus1;                    // do you add one (for determining underflow flag)
     logic R;   // bits needed to determine rounding
@@ -95,7 +95,7 @@ module postprocess(
     logic [`FMTBITS-1:0] OutFmt;
     // fma signals
     logic [`NE+1:0] FmaSe;     // exponent of the normalized sum
-    logic FmaSmZero;        // is the sum zero
+    logic FmaSZero;        // is the sum zero
     logic [3*`NF+8:0] FmaShiftIn;        // shift input
     logic [`NE+1:0] FmaConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
     logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
@@ -153,8 +153,8 @@ module postprocess(
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
-                          .ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
+                          .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSel)
@@ -185,7 +185,7 @@ module postprocess(
 
     lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
                                 .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
-                                .DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac);
+                                .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -204,14 +204,14 @@ module postprocess(
     round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
                 .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
                 .DivSticky, .DivNegSticky, .DivDone,
-                .DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
+                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
     ///////////////////////////////////////////////////////////////////////////////
 
     resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
-                          .FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws);
+                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Flags
@@ -220,7 +220,7 @@ module postprocess(
     flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
                 .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
                 .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
-                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
+                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
                 .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
 
     ///////////////////////////////////////////////////////////////////////////////
@@ -228,10 +228,10 @@ module postprocess(
     ///////////////////////////////////////////////////////////////////////////////
 
     negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
-    resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
+    specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
         .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
         .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
         .XInf, .YInf, .DivOp,
-        .DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes);
+        .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes);
 
 endmodule
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
deleted file mode 100644
index 4389056f..00000000
--- a/pipelined/src/fpu/resultselect.sv
+++ /dev/null
@@ -1,290 +0,0 @@
-///////////////////////////////////////////
-//
-// Written: me@KatherineParry.com
-// Modified: 7/5/2022
-//
-// Purpose: special case selection
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// MIT LICENSE
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
-// software and associated documentation files (the "Software"), to deal in the Software 
-// without restriction, including without limitation the rights to use, copy, modify, merge, 
-// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
-// to whom the Software is furnished to do so, subject to the following conditions:
-//
-//   The above copyright notice and this permission notice shall be included in all copies or 
-//   substantial portions of the Software.
-//
-//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
-//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
-//   OR OTHER DEALINGS IN THE SOFTWARE.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module resultselect(
-    input logic                 Xs,        // input signs
-    input logic  [`NF:0]        Xm, Ym, Zm, // input mantissas
-    input logic                 XNaN, YNaN, ZNaN,    // inputs are NaN
-    input logic  [2:0]          Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic  [`FMTBITS-1:0] OutFmt,       // output format
-    input logic                 InfIn,
-    input logic                 XInf, YInf,
-    input logic                 XZero,
-    input logic                 IntZero,
-    input logic                 NaNIn,
-    input logic                 IntToFp,
-    input logic                 Int64,
-    input logic                 Signed,
-    input logic                 CvtOp,
-    input logic                 DivOp,
-    input logic                 FmaOp,
-    input logic                 Plus1,
-    input logic                 DivByZero,
-    input logic  [`NE:0]        CvtCe,    // the calculated expoent
-    input logic                 Ws,  // the res's sign
-    input logic                 IntInvalid, Invalid, Overflow,  // flags
-    input logic                 CvtResUf,
-    input logic  [`NE-1:0]      Re,          // Res exponent
-    input logic  [`NE+1:0]      FullResExp,          // Res exponent
-    input logic  [`NF-1:0]      Rf,         // Res fraction
-    input logic  [`XLEN+1:0]    CvtNegRes,     // the negation of the result
-    output logic [`FLEN-1:0]    W,     // final res
-    output logic [`XLEN-1:0]    FCvtIntRes     // final res
-);
-    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
-    logic OfResMax;
-    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
-    logic KillRes;
-    logic SelOfRes;
-
-
-    // does the overflow result output the maximum normalized floating point number
-    //                output infinity if the input is infinity
-    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws));
-
-    if (`FPSIZES == 1) begin
-
-        //NaN res selection depending on standard
-        if(`IEEE754) begin
-            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end else begin
-            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-        end
-
-        assign OfRes =  OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
-        assign NormRes = {Ws, Re, Rf};
-
-    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
-        if(`IEEE754) begin
-            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
-            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end else begin 
-            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-        end
-        
-        assign OfRes =  OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} :
-                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-        assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (OutFmt)
-                `FMT: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {Ws, Re, Rf};
-                end
-                `FMT1: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
-                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
-                    end
-                    OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
-                    UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
-                end
-                `FMT2: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
-                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
-                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
-                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
-                    end
-                    
-                    OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)};
-                    UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
-                end
-                default: begin
-                    if(`IEEE754) begin
-                        XNaNRes = (`FLEN)'(0);
-                        YNaNRes = (`FLEN)'(0);
-                        ZNaNRes = (`FLEN)'(0);
-                        InvalidRes = (`FLEN)'(0);
-                    end else begin 
-                        InvalidRes = (`FLEN)'(0);
-                    end
-                    OfRes = (`FLEN)'(0);
-                    UfRes = (`FLEN)'(0);
-                    NormRes = (`FLEN)'(0);
-                end
-            endcase
-
-    end else if (`FPSIZES == 4) begin 
-        always_comb
-            case (OutFmt)
-                2'h3: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end else begin 
-                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
-                    end
-                    
-                    OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {Ws, Re, Rf};
-                end
-                2'h1: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
-                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
-                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
-                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
-                    end
-                    OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
-                end
-                2'h0: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
-                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
-                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
-                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
-                    end
-                    
-                    OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
-                end
-                2'h2: begin  
-                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
-                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
-                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
-                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end else begin 
-                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
-                    end
-                    
-                    OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)};      
-	            // zero is exact fi dividing by infinity so don't add 1
-                    UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                    NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
-                end
-            endcase
-
-    end
-
-    
-
-
-
-    // determine if you shoould kill the res - Cvt
-    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
-    //      - dont set to zero if fp input is zero but not using the fp input
-    //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
-    assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
-    // output infinity with result sign if divide by zero
-    if(`IEEE754) begin
-        assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
-                         YNaN&~CvtOp ? YNaNRes :
-                         ZNaN&FmaOp ? ZNaNRes :
-                         Invalid ? InvalidRes : 
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end else begin
-        assign W = NaNIn|Invalid ? InvalidRes :
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end
-
-    ///////////////////////////////////////////////////////////////////////////////////////
-    //
-    //      |||||||||||   |||     |||   |||||||||||||
-    //          |||       ||||||  |||        |||
-    //          |||       ||| ||| |||        |||
-    //          |||       |||  ||||||        |||
-    //      |||||||||||   |||     |||        |||
-    //
-    ///////////////////////////////////////////////////////////////////////////////////////        
-
-    // *** probably can optimize the negation
-    // select the overflow integer res
-    //      - negitive infinity and out of range negitive input
-    //                 |  int  |  long  |
-    //          signed | -2^31 | -2^63  |
-    //        unsigned |   0   |    0   |
-    //
-    //      - positive infinity and out of range positive input and NaNs
-    //                 |   int  |  long  |
-    //          signed | 2^31-1 | 2^63-1 |
-    //        unsigned | 2^32-1 | 2^64-1 |
-    //
-    //      other: 32 bit unsinged res should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
-                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive
-                                              {`XLEN{1'b1}};// unsigned positive
-
-
-    // select the integer output
-    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
-    //      - if the input underflows
-    //          - if rounding and signed opperation and negitive input, output -1
-    //          - otherwise output a rounded 0
-    //      - otherwise output the normal res (trmined and sign extended if nessisary)
-    assign FCvtIntRes = IntInvalid ?  OfIntRes :
-			            CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
-                        Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
-endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index 05c3b461..e6de0c18 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -35,7 +35,7 @@ module resultsign(
     input logic         InfIn,
     input logic         FmaOp,
     input logic [`NE+1:0] FmaSe,
-    input logic         FmaSmZero,
+    input logic         FmaSZero,
     input logic         Mult,
     input logic         R,
     input logic         S,
@@ -61,6 +61,6 @@ module resultsign(
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
     assign InfSgn = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn;
+    assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn;
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 4c185ff3..c73edc08 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -57,7 +57,7 @@ module round(
     input logic                     DivSticky,             // sticky bit
     input logic                     DivNegSticky,
     output logic                    UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]          FullResExp,      // Re with bits to determine sign and overflow
+    output logic [`NE+1:0]          FullRe,      // Re with bits to determine sign and overflow
     output logic [`NF-1:0]          Rf,         // Result fraction
     output logic [`NE-1:0]          Re,          // Result exponent
     output logic                    S,             // sticky bit
@@ -344,8 +344,8 @@ module round(
 
     // round the result
     //      - if the fraction overflows one should be added to the exponent
-    assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd;
-    assign Re = FullResExp[`NE-1:0];
+    assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd;
+    assign Re = FullRe[`NE-1:0];
 
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
index 741d4e83..1c7b9648 100644
--- a/pipelined/src/fpu/srt-radix4.sv
+++ b/pipelined/src/fpu/srt-radix4.sv
@@ -30,7 +30,7 @@
 
 `include "wally-config.vh"
 
-module srtradix4 (
+module srtradix4(
   input  logic clk,
   input  logic DivStart, 
   input  logic DivBusy, 
@@ -40,20 +40,29 @@ module srtradix4 (
   input logic [`DIVLEN-1:0] X,
   input logic [`DIVLEN-1:0] Dpreproc,
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [`DIVLEN+2:0] Quot,
+  output logic [`QLEN-1:0] Quot,
   output logic [`DIVLEN+3:0]  WSN, WCN,
-  output logic [`DIVLEN+3:0]  WS, WC,
+  output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
   output logic  [`NE+1:0] DivCalcExpM,
   output logic [`XLEN-1:0] Rem
 );
 
-  logic [3:0]     q;
-  logic [`DIVLEN+3:0]  WSA;
-  logic [`DIVLEN+3:0]  WCA;
-  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
+
+ /* verilator lint_off UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WCA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WS[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WC[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
+ /* verilator lint_on UNOPTFLAT */
+  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
   logic [`NE+1:0] DivCalcExp;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
+  logic [`QLEN-1:0] QMux, QMMux;
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -63,47 +72,43 @@ module srtradix4 (
   //  - otherwise load WSA into the flipflop
   //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
   //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
-  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
-  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
-  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
+  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
+  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
   flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
   flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
 
-  // Quotient Selection logic
-  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  // *** change this for radix 4 - generate w/ stine code
-  // q encoding:
-	// 1000 = +2
-	// 0100 = +1
-	// 0000 =  0
-	// 0010 = -1
-	// 0001 = -2
-  qsel4 qsel4(.D, .WS, .WC, .q);
 
-  // Divisor Selection logic
-  // *** radix 4 change to choose -2 to 2
+  // Divisor Selections
   // - choose the negitive version of what's being selected
   assign DBar = ~D;
   assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
   assign D2 = {D[`DIVLEN+2:0], 1'b0};
 
-  always_comb
-    case (q)
-      4'b1000: Dsel = DBar2;
-      4'b0100: Dsel = DBar;
-      4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
-      4'b0010: Dsel = D;
-      4'b0001: Dsel = D2;
-      default: Dsel = {`DIVLEN+4{1'bx}};
-    endcase
+  genvar i;
+  generate
+    for(i=0; i<`DIVCOPIES; i++) begin
+      divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, 
+      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
+      if(i<3) begin 
+        assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
+        assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
+        assign Q[i+1] = QNext[i];
+        assign QM[i+1] = QMNext[i];
+      end
+    end
+  endgenerate
 
-  // Partial Product Generation
-  //  WSA, WCA = WS + WC - qD
-  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
-  
-  //*** change for radix 4
-  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);
+  // if starting a new divison set Q to 0 and QM to -1
+  mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux);
+  mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
+  flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage
+  flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
+
+  assign Quot = Q[0];
+  assign FirstWS = WS[0];
+  assign FirstWC = WC[0];
 
   expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
 
@@ -113,7 +118,50 @@ endmodule
 // Submodules //
 ////////////////
 
+ /* verilator lint_off UNOPTFLAT */
+module divinteration (
+  input logic clk,
+  input logic DivStart,
+  input logic DivBusy,
+  input logic [`DIVLEN+3:0] D,
+  input logic [`DIVLEN+3:0]  DBar, D2, DBar2,
+  input logic [`QLEN-1:0] Q, QM,
+  input logic [`DIVLEN+3:0]  WS, WC,
+  output logic [`QLEN-1:0] QNext, QMNext, 
+  output logic [`DIVLEN+3:0]  WSA, WCA
+);
+ /* verilator lint_on UNOPTFLAT */
 
+  logic [`DIVLEN+3:0]  Dsel;
+  logic [3:0]     q;
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  // q encoding:
+	// 1000 = +2
+	// 0100 = +1
+	// 0000 =  0
+	// 0010 = -1
+	// 0001 = -2
+  qsel4 qsel4(.D, .WS, .WC, .q);
+
+  always_comb
+    case (q)
+      4'b1000: Dsel = DBar2;
+      4'b0100: Dsel = DBar;
+      4'b0000: Dsel = {`DIVLEN+4{1'b0}};
+      4'b0010: Dsel = D;
+      4'b0001: Dsel = D2;
+      default: Dsel = {`DIVLEN+4{1'bx}};
+    endcase
+
+  // Partial Product Generation
+  //  WSA, WCA = WS + WC - qD
+  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
+
+  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext);
+
+endmodule
 
 module qsel4 (
 	input logic [`DIVLEN+3:0] D,
@@ -195,7 +243,8 @@ module otfc4 (
   input  logic         DivStart,
   input  logic         DivBusy,
   input  logic [3:0]   q,
-  output logic [`DIVLEN+2:0] Quot
+  input logic [`QLEN-1:0] Q, QM,
+  output logic [`QLEN-1:0] QNext, QMNext
 );
 
   //  The on-the-fly converter transfers the quotient 
@@ -207,16 +256,11 @@ module otfc4 (
   //
   //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
-  logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
+
   //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
-  logic [`DIVLEN:0] QR, QMR;
-  // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
-  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
-  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
+  logic [`QLEN-3:0] QR, QMR;
 
   // shift Q (quotent) and QM (quotent-1)
 		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
@@ -227,8 +271,8 @@ module otfc4 (
     // *** how does the 0 concatination numbers work?
 
   always_comb begin
-    QR  = Quot[`DIVLEN:0];
-    QMR = QM[`DIVLEN:0];     // Shift Q and QM
+    QR  = Q[`QLEN-3:0];
+    QMR = QM[`QLEN-3:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
       QMNext = {QR,  2'b01};
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 008b234d..fc73cf71 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -40,8 +40,8 @@ module srtfsm(
   input  logic DivStart, 
   input logic StallE,
   input logic StallM,
-  input  logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
+  input  logic [`DURLEN-1:0] Dur,
+  output logic [`DURLEN-1:0] EarlyTermShiftE,
   output logic DivStickyE,
   output logic DivDone,
   output logic DivNegStickyE,
@@ -51,7 +51,7 @@ module srtfsm(
   typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
   statetype state;
 
-  logic [$clog2(`DIVLEN/2+3)-1:0] step;
+  logic [`DURLEN-1:0] step;
   logic WZero;
   //logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
   logic [`DIVLEN+3:0] W;
@@ -63,7 +63,7 @@ module srtfsm(
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
-  assign EarlyTermShiftDiv2E = step;
+  assign EarlyTermShiftE = step;
 
   always_ff @(posedge clk) begin
       if (reset) begin
@@ -73,7 +73,7 @@ module srtfsm(
           if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
           else         state <= #1 BUSY;
       end else if (state == BUSY) begin
-          if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
+          if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin
               state <= #1 DONE;
           end
           step <= step - 1;
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index d17d2abd..fa76c051 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -35,7 +35,7 @@ module srtpreproc (
   output logic [`DIVLEN-1:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
+  output logic [`DURLEN-1:0] Dur
 );
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
@@ -63,10 +63,20 @@ module srtpreproc (
   
   assign X = PreprocX;
   assign Dpreproc = PreprocY;
-
-  assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
+  
+  assign Dur = (`DURLEN)'($rtoi(`FPDUR));
   // assign intExp = zeroCntB - zeroCntA + 1;
   // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
+  //           radix 2     radix 4
+  // 1 copies  DIVLEN+2    DIVLEN+2/2
+  // 2 copies  DIVLEN+2/2  DIVLEN+2/2*2
+  // 4 copies  DIVLEN+2/4  DIVLEN+2/2*4
+  // 8 copies  DIVLEN+2/8  DIVLEN+2/2*8
+
+  // DIVRESLEN = DIVLEN or DIVLEN+2
+  // r = 1 or 2
+  // DIVRESLEN/(r*`DIVCOPIES)
+
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index fa46a060..ba14499e 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -80,9 +80,9 @@ module testbenchfp;
   logic CvtResSgnE;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
-	logic [`DIVLEN+2:0] Quot;
+	logic [`QLEN-1:0] Quot;
   logic CvtResDenormUfE;
-  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
+  logic [`DURLEN-1:0] EarlyTermShift;
   logic DivStart, DivBusy;
   logic reset = 1'b0;
   logic [`DIVLEN-1:0]    DivX;
@@ -90,7 +90,7 @@ module testbenchfp;
   logic [`DIVLEN+3:0]  WSN, WS;
   logic [`DIVLEN+3:0]  WCN, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
-  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DURLEN-1:0] Dur;
 
   // in-between FMA signals
   logic                 Mult;
@@ -686,8 +686,8 @@ module testbenchfp;
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
               .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
-              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-              .PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes));
+              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   
   fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), 
             .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
@@ -697,8 +697,8 @@ module testbenchfp;
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
   srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
   srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
-                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
+                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
+  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
                 .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
 
   assign CmpFlg[3:0] = 0;

From 5c0ecfa4334c4c8c53800813625b52620e4d7c3c Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 11 Jul 2022 18:31:51 -0700
Subject: [PATCH 04/36] forgot a file

---
 pipelined/src/fpu/specialcase.sv | 290 +++++++++++++++++++++++++++++++
 1 file changed, 290 insertions(+)
 create mode 100644 pipelined/src/fpu/specialcase.sv

diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv
new file mode 100644
index 00000000..3c28eae2
--- /dev/null
+++ b/pipelined/src/fpu/specialcase.sv
@@ -0,0 +1,290 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: special case selection
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module specialcase(
+    input logic                 Xs,        // input signs
+    input logic  [`NF:0]        Xm, Ym, Zm, // input mantissas
+    input logic                 XNaN, YNaN, ZNaN,    // inputs are NaN
+    input logic  [2:0]          Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic  [`FMTBITS-1:0] OutFmt,       // output format
+    input logic                 InfIn,
+    input logic                 NaNIn,
+    input logic                 XInf, YInf,
+    input logic                 XZero,
+    input logic                 IntZero,
+    input logic                 IntToFp,
+    input logic                 Int64,
+    input logic                 Signed,
+    input logic                 CvtOp,
+    input logic                 DivOp,
+    input logic                 FmaOp,
+    input logic                 Plus1,
+    input logic                 DivByZero,
+    input logic  [`NE:0]        CvtCe,    // the calculated expoent
+    input logic                 Ws,  // the res's sign
+    input logic                 IntInvalid, Invalid, Overflow,  // flags
+    input logic                 CvtResUf,
+    input logic  [`NE-1:0]      Re,          // Res exponent
+    input logic  [`NE+1:0]      FullRe,          // Res exponent
+    input logic  [`NF-1:0]      Rf,         // Res fraction
+    input logic  [`XLEN+1:0]    CvtNegRes,     // the negation of the result
+    output logic [`FLEN-1:0]    PostProcRes,     // final res
+    output logic [`XLEN-1:0]    FCvtIntRes     // final res
+);
+    logic [`FLEN-1:0]   XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results
+    logic OfResMax;
+    logic [`XLEN-1:0]       OfIntRes;   // the overflow result for integer output
+    logic KillRes;
+    logic SelOfRes;
+
+
+    // does the overflow result output the maximum normalized floating point number
+    //                output infinity if the input is infinity
+    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws));
+
+    if (`FPSIZES == 1) begin
+
+        //NaN res selection depending on standard
+        if(`IEEE754) begin
+            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
+            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end else begin
+            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end
+
+        assign OfRes =  OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
+        assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
+        assign NormRes = {Ws, Re, Rf};
+
+    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
+        if(`IEEE754) begin
+            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
+            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
+            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end else begin 
+            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end
+        
+        assign OfRes =  OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} :
+                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
+        assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+        assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (OutFmt)
+                `FMT: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
+                    UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {Ws, Re, Rf};
+                end
+                `FMT1: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
+                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
+                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end
+                    OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
+                    UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+                end
+                `FMT2: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
+                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
+                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
+                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)};
+                    UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
+                end
+                default: begin
+                    if(`IEEE754) begin
+                        XNaNRes = (`FLEN)'(0);
+                        YNaNRes = (`FLEN)'(0);
+                        ZNaNRes = (`FLEN)'(0);
+                        InvalidRes = (`FLEN)'(0);
+                    end else begin 
+                        InvalidRes = (`FLEN)'(0);
+                    end
+                    OfRes = (`FLEN)'(0);
+                    UfRes = (`FLEN)'(0);
+                    NormRes = (`FLEN)'(0);
+                end
+            endcase
+
+    end else if (`FPSIZES == 4) begin 
+        always_comb
+            case (OutFmt)
+                2'h3: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
+                    UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {Ws, Re, Rf};
+                end
+                2'h1: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
+                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
+                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
+                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end
+                    OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)};
+                    UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
+                end
+                2'h0: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
+                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
+                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
+                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)};
+                    UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
+                end
+                2'h2: begin  
+                    if(`IEEE754) begin
+                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
+                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
+                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
+                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end else begin 
+                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end
+                    
+                    OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+	            // zero is exact fi dividing by infinity so don't add 1
+                    UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                    NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
+                end
+            endcase
+
+    end
+
+    
+
+
+
+    // determine if you shoould kill the res - Cvt
+    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+    //      - dont set to zero if fp input is zero but not using the fp input
+    //      - dont set to zero if int input is zero but not using the int input
+    assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
+    assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
+    // output infinity with result sign if divide by zero
+    if(`IEEE754) begin
+        assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
+                         YNaN&~CvtOp ? YNaNRes :
+                         ZNaN&FmaOp ? ZNaNRes :
+                         Invalid ? InvalidRes : 
+                         SelOfRes ? OfRes :
+                         KillRes ? UfRes :  
+                         NormRes;
+    end else begin
+        assign PostProcRes = NaNIn|Invalid ? InvalidRes :
+                         SelOfRes ? OfRes :
+                         KillRes ? UfRes :  
+                         NormRes;
+    end
+
+    ///////////////////////////////////////////////////////////////////////////////////////
+    //
+    //      |||||||||||   |||     |||   |||||||||||||
+    //          |||       ||||||  |||        |||
+    //          |||       ||| ||| |||        |||
+    //          |||       |||  ||||||        |||
+    //      |||||||||||   |||     |||        |||
+    //
+    ///////////////////////////////////////////////////////////////////////////////////////        
+
+    // *** probably can optimize the negation
+    // select the overflow integer res
+    //      - negitive infinity and out of range negitive input
+    //                 |  int  |  long  |
+    //          signed | -2^31 | -2^63  |
+    //        unsigned |   0   |    0   |
+    //
+    //      - positive infinity and out of range positive input and NaNs
+    //                 |   int  |  long  |
+    //          signed | 2^31-1 | 2^63-1 |
+    //        unsigned | 2^32-1 | 2^64-1 |
+    //
+    //      other: 32 bit unsinged res should be sign extended as if it were a signed number
+    assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
+                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
+                               Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive
+                                              {`XLEN{1'b1}};// unsigned positive
+
+
+    // select the integer output
+    //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
+    //      - if the input underflows
+    //          - if rounding and signed opperation and negitive input, output -1
+    //          - otherwise output a rounded 0
+    //      - otherwise output the normal res (trmined and sign extended if nessisary)
+    assign FCvtIntRes = IntInvalid ?  OfIntRes :
+			            CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
+                        Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
+endmodule
\ No newline at end of file

From 3483b92480a32d52a1e3d88137aee17a1d82d18e Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Tue, 12 Jul 2022 02:21:38 +0000
Subject: [PATCH 05/36] On the fly conversion for square root

---
 pipelined/srt/srt.sv | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 6e8cd560..7886af47 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -229,7 +229,7 @@ module otfc2 #(parameter N=64) (
   logic [N+1:0] QR, QMR;
 
   flopr #(N+3) Qreg(clk, Start, QNext, Q);
-  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
+  mux2 #(`DIVLEN+3) Qmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
   flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   always_comb begin
@@ -254,12 +254,36 @@ endmodule
 // Square Root OTFC, Radix 2 //
 ///////////////////////////////
 module softc2(
-  input  logic clk,
-  input  logic Start,
-  input  logic sp, sn,
-  output logic S,
+  input  logic         clk,
+  input  logic         Start,
+  input  logic         sp, sn,
+  input  logic [N+3:0] C,
+  output logic [N-1:0] Sq,
 );
 
+
+  //  The on-the-fly converter transfers the square root 
+  //  bits to the quotient as they come.
+  logic [N+2:0] S, SM, SNext, SMNext, SMMux;
+
+  flopr #(N+3) Sreg(clk, Start, SNext, S);
+  mux2 #(`DIVLEN+3) Smux(SMNext, {`DIVLEN+3{1'b1}}, Start, SMMux);
+  flop #(`DIVLEN+3) SMreg(clk, SMMux, SM);
+
+  always_comb begin
+    if (sp) begin
+      SNext  = S | ((C << 2) & ~(C << 1));
+      SMNext = S;
+    end else if (sn) begin
+      SNext  = SM | ((C << 2) & ~(C << 1));
+      SMNext = SM;
+    end else begin        // If sp and sn are not true, then sz is
+      SNext  = S;
+      SMNext = SM | ((C << 2) & ~(C << 1));
+    end 
+  end
+  assign Sq = S[N+2] ? S[N+1:2] : S[N:1];
+
 endmodule
 /////////////
 // counter //

From 9d4acc9ddbb16e3471e6c59baf573c113b790cc5 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Tue, 12 Jul 2022 22:18:56 +0000
Subject: [PATCH 06/36] C register and other various fixes

---
 pipelined/srt/srt.sv | 54 +++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 7886af47..7e6d22a4 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -54,13 +54,13 @@ module srt (
   output logic [3:0] Flags
 );
 
-  logic           qp, qz, qm; // quotient is +1, 0, or -1
-  logic [`NE-1:0] calcExp;
-  logic           calcSign;
-  logic [`DIVLEN+3:0]  X, Dpreproc;
-  logic [`DIVLEN+3:0]  WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
+  logic                       qp, qz, qm; // quotient is +1, 0, or -1
+  logic [`NE-1:0]             calcExp;
+  logic                       calcSign;
+  logic [`DIVLEN+3:0]         X, Dpreproc, C;
+  logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
-  logic           intSign;
+  logic                       intSign;
  
   srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
 
@@ -91,7 +91,11 @@ module srt (
   // Partial Product Generation
   csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
   
-  otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
+  // If only implementing division, use divide otfc
+  // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
+  // otherwise use sotfc
+  creg              sotfcC(clk, Start, C);
+  sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot);
 
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
@@ -138,9 +142,9 @@ module srtpreproc (
   assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
 
   assign DivX = Int ? PreprocA : PreprocX;
-  assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac};
+  assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
 
-  assign X = Sqrt ? SqrtX : {4'b0001, DivX};
+  assign X = Sqrt ? {SqrtX, {(`EXTRAINTBITS-1){1'b0}}} : {4'b0001, DivX};
   assign D = {4'b0001, Int ? PreprocB : PreprocY};
   assign intExp = zeroCntB - zeroCntA + 1;
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
@@ -253,22 +257,22 @@ endmodule
 ///////////////////////////////
 // Square Root OTFC, Radix 2 //
 ///////////////////////////////
-module softc2(
+module sotfc2(
   input  logic         clk,
   input  logic         Start,
   input  logic         sp, sn,
-  input  logic [N+3:0] C,
-  output logic [N-1:0] Sq,
+  input  logic [`DIVLEN+3:0] C,
+  output logic [`DIVLEN-1:0] Sq,
 );
 
 
   //  The on-the-fly converter transfers the square root 
   //  bits to the quotient as they come.
-  logic [N+2:0] S, SM, SNext, SMNext, SMMux;
+  logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
 
-  flopr #(N+3) Sreg(clk, Start, SNext, S);
-  mux2 #(`DIVLEN+3) Smux(SMNext, {`DIVLEN+3{1'b1}}, Start, SMMux);
-  flop #(`DIVLEN+3) SMreg(clk, SMMux, SM);
+  flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM);
+  mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux);
+  flop #(`DIVLEN+4) SMreg(clk, SMux, M);
 
   always_comb begin
     if (sp) begin
@@ -282,9 +286,23 @@ module softc2(
       SMNext = SM | ((C << 2) & ~(C << 1));
     end 
   end
-  assign Sq = S[N+2] ? S[N+1:2] : S[N:1];
+  assign Sq = S[`DIVLEN-1:0];
 
 endmodule
+
+//////////////////////////
+// C Register for SOTFC //
+//////////////////////////
+module creg(input  logic clk,
+            input  logic Start,
+            output logic [`DIVLEN+3:0] C
+);
+  logic [`DIVLEN+3:0] CMux;
+
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, (`DIVLEN-2){1'b0}}, Start, CMux);
+  flop #(`DIVLEN+4) cflop(clk, CMux, C);
+endmodule
+
 /////////////
 // counter //
 /////////////
@@ -293,7 +311,7 @@ module counter(input  logic clk,
                input  logic [$clog2(`XLEN+1)-1:0] dur,
                output logic done);
  
-   logic    [$clog2(`XLEN+1)-1:0]  count;
+  logic    [$clog2(`XLEN+1)-1:0]  count;
 
   // This block of control logic sequences the divider
   // through its iterations.  You may modify it if you

From 452b017f9a87751624b0d584ca62686771dd857e Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 12 Jul 2022 22:42:19 +0000
Subject: [PATCH 07/36] found the bug in the store modification

---
 pipelined/src/lsu/lsu.sv                  | 4 ++--
 tests/riscof/spike/spike_rv32imc_isa.yaml | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv
index 50ecdb18..e9f41e65 100644
--- a/pipelined/src/lsu/lsu.sv
+++ b/pipelined/src/lsu/lsu.sv
@@ -235,8 +235,8 @@ module lsu (
       .s(SelUncachedAdr), .y(LSUBusHWDATA));
     
     if(CACHE_ENABLED) begin : dcache
-      if (`LLEN>`FLEN)
-        mux2 #(`LLEN) datamux({{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
+      if (`LLEN>`XLEN)
+        mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM);
       else
         assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM};
       cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
diff --git a/tests/riscof/spike/spike_rv32imc_isa.yaml b/tests/riscof/spike/spike_rv32imc_isa.yaml
index 5a76fd97..04a5da18 100644
--- a/tests/riscof/spike/spike_rv32imc_isa.yaml
+++ b/tests/riscof/spike/spike_rv32imc_isa.yaml
@@ -1,11 +1,11 @@
 hart_ids: [0]
 hart0:
-  ISA: RV32IMAFCZicsr_Zifencei
+  ISA: RV32IMAFDCZicsr_Zifencei
   physical_addr_sz: 32
   User_Spec_Version: '2.3'
   supported_xlen: [32]
   misa:
-   reset-val: 0x40001125
+   reset-val: 0x4000112D
    rv32:
      accessible: true
      mxl:
@@ -23,6 +23,6 @@ hart0:
            warl:
               dependency_fields: []
               legal:
-                - extensions[25:0] bitmask [0x0001125, 0x0000000]
+                - extensions[25:0] bitmask [0x000112D, 0x0000000]
               wr_illegal:
                 - Unchanged
\ No newline at end of file

From ed9106128fe828be813b7f3d2b81e4ab86a869ba Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Tue, 12 Jul 2022 22:45:54 +0000
Subject: [PATCH 08/36] Square root implemented

---
 pipelined/srt/srt.sv       | 8 ++++----
 pipelined/srt/testbench.sv | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 7e6d22a4..1037c9e2 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -92,10 +92,10 @@ module srt (
   csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
   
   // If only implementing division, use divide otfc
-  // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
+  otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
   // otherwise use sotfc
-  creg              sotfcC(clk, Start, C);
-  sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot);
+  // creg              sotfcC(clk, Start, C);
+  // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot);
 
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
@@ -272,7 +272,7 @@ module sotfc2(
 
   flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM);
   mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux);
-  flop #(`DIVLEN+4) SMreg(clk, SMux, M);
+  flop #(`DIVLEN+4) SMreg(clk, SMux, S);
 
   always_comb begin
     if (sp) begin
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 83f33707..1f6f1561 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -82,7 +82,7 @@ module testbench;
 
   // Equip Int test or Sqrt test
   assign Int = 1'b0;
-  assign Sqrt = 1'b1;
+  assign Sqrt = 1'b0;
 
   // Divider
   srt srt(.clk, .Start(req), 
@@ -111,7 +111,7 @@ module testbench;
     begin
       testnum = 0; 
       errors = 0;
-      $readmemh ("sqrttestvectors", Tests);
+      $readmemh ("testvectors", Tests);
       Vec = Tests[testnum];
       a = Vec[`mema];
       {asign, aExp, afrac} = a;

From 5c9f0115610bf4f37c5282a8e733f1f66a0b2917 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Tue, 12 Jul 2022 23:04:33 +0000
Subject: [PATCH 09/36] little fix

---
 pipelined/srt/srt.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 1037c9e2..b3de448f 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -144,7 +144,7 @@ module srtpreproc (
   assign DivX = Int ? PreprocA : PreprocX;
   assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
 
-  assign X = Sqrt ? {SqrtX, {(`EXTRAINTBITS-1){1'b0}}} : {4'b0001, DivX};
+  assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
   assign D = {4'b0001, Int ? PreprocB : PreprocY};
   assign intExp = zeroCntB - zeroCntA + 1;
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);

From e05b2a07d2dada0135a6f33c62962fa65576cefd Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 12 Jul 2022 18:32:17 -0700
Subject: [PATCH 10/36] removed warnings and took a mux out of the critical
 path

---
 pipelined/config/rv64fp/wally-config.vh |  2 +-
 pipelined/config/shared/wally-shared.vh | 13 +++++++------
 pipelined/regression/wave-fpu.do        |  3 ---
 pipelined/src/fpu/divsqrt.sv            |  6 +++---
 pipelined/src/fpu/fmashiftcalc.sv       |  3 ++-
 pipelined/src/fpu/postprocess.sv        |  2 +-
 pipelined/src/fpu/srt-radix4.sv         | 26 +++++++++++++------------
 pipelined/src/fpu/srtfsm.sv             |  6 +++---
 pipelined/src/fpu/srtpreproc.sv         |  3 +--
 pipelined/src/generic/lzc.sv            |  2 +-
 pipelined/testbench/testbench-fp.sv     |  8 ++++----
 synthDC/scripts/synth.tcl               |  2 +-
 12 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index cc8d1b2b..8f13b2e3 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -32,7 +32,7 @@
 `define DESIGN_COMPILER 0
 
 // RV32 or RV64: XLEN = 32 or 64
-`define XLEN 32
+`define XLEN 64
 
 // IEEE 754 compliance
 `define IEEE754 0
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index c064783c..54fa7a9b 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -101,14 +101,15 @@
 `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
 
 // division constants
-`define RADIX 4
-`define DIVCOPIES 4
+`define RADIX 32'h4
+`define DIVCOPIES 32'h4
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
 `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
-`define LOGR ((`RADIX==2) ? 1 : 2)
-`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES))
-`define DURLEN ($clog2($rtoi(`FPDUR)+1))
-`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES)
+`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
+// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
+`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES))
+`define DURLEN ($clog2(`FPDUR+1))
+`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
 
 
 `define USE_SRAM 0
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 58f782bd..9a3d7e06 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
-add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/*
-add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/*
-add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/*
 add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index c4f09aea..8420baa1 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -52,7 +52,7 @@ module divsqrt(
 //   output logic [`XLEN-1:0] RemM,
 );
 
-  logic [`DIVLEN+3:0]  WSN, WCN;
+  logic [`DIVLEN+3:0]  NextWSN, NextWCN;
   logic [`DIVLEN+3:0]  WS, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DIVLEN-1:0] X;
@@ -61,8 +61,8 @@ module divsqrt(
 
   srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
-  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
                 .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
+  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
                 .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index d4898e80..3c286b50 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -53,7 +53,8 @@ module fmashiftcalc(
     assign FmaSZero = ~(|FmaSm);
 
     // calculate the sum's exponent
-    assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
+    //                                                                      ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
+    assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 18452abd..30945532 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -29,7 +29,7 @@
 
 `include "wally-config.vh"
 
-module postprocess(
+module postprocess (
     // general signals
     input logic                             Xs, Ys,  // input signs
     input logic  [`NE-1:0]                  Ze, // input exponents
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
index 1c7b9648..5a7e96e2 100644
--- a/pipelined/src/fpu/srt-radix4.sv
+++ b/pipelined/src/fpu/srt-radix4.sv
@@ -41,7 +41,7 @@ module srtradix4(
   input logic [`DIVLEN-1:0] Dpreproc,
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
   output logic [`QLEN-1:0] Quot,
-  output logic [`DIVLEN+3:0]  WSN, WCN,
+  output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
   output logic  [`NE+1:0] DivCalcExpM,
   output logic [`XLEN-1:0] Rem
@@ -58,11 +58,12 @@ module srtradix4(
   logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
   logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
  /* verilator lint_on UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSN, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
   logic [`NE+1:0] DivCalcExp;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
-  logic [`QLEN-1:0] QMux, QMMux;
+  logic [`QLEN-1:0] QMMux;
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -72,9 +73,11 @@ module srtradix4(
   //  - otherwise load WSA into the flipflop
   //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
   //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+  assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+  mux2   #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
   flop   #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
-  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
+  mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flop   #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
   flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
   flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
@@ -88,10 +91,10 @@ module srtradix4(
 
   genvar i;
   generate
-    for(i=0; i<`DIVCOPIES; i++) begin
+    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin
       divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, 
       .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
-      if(i<3) begin 
+      if(i<(`DIVCOPIES-1)) begin 
         assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
         assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
         assign Q[i+1] = QNext[i];
@@ -101,9 +104,8 @@ module srtradix4(
   endgenerate
 
   // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux);
   mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
-  flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage
+  flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
   flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
 
   assign Quot = Q[0];
@@ -181,7 +183,7 @@ module qsel4 (
 
 	logic [3:0] QSel4[1023:0];
 
-  initial begin 
+  always_comb begin 
     integer d, w, i, w2;
     for(d=0; d<8; d++)
       for(w=0; w<128; w++)begin
@@ -270,9 +272,9 @@ module otfc4 (
 		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
     // *** how does the 0 concatination numbers work?
 
+  assign QR  = Q[`QLEN-3:0];
+  assign QMR = QM[`QLEN-3:0];     // Shifted Q and QM
   always_comb begin
-    QR  = Q[`QLEN-3:0];
-    QMR = QM[`QLEN-3:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
       QMNext = {QR,  2'b01};
@@ -352,5 +354,5 @@ module expcalc(
             endcase
     end
     // correct exponent for denormalized input's normalization shifts
-    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
     endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index fc73cf71..21e35c36 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -33,7 +33,7 @@
 module srtfsm(
   input  logic clk, 
   input  logic reset, 
-  input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
+  input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
   input  logic XInfE, YInfE, 
   input  logic XZeroE, YZeroE, 
   input  logic XNaNE, YNaNE, 
@@ -58,8 +58,8 @@ module srtfsm(
 
   //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
   assign DivBusy = (state == BUSY);
-  assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
-  assign DivStickyE = ~WZero;
+  assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
+  assign DivStickyE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index fa76c051..7386332f 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -63,8 +63,7 @@ module srtpreproc (
   
   assign X = PreprocX;
   assign Dpreproc = PreprocY;
-  
-  assign Dur = (`DURLEN)'($rtoi(`FPDUR));
+  assign Dur = (`DURLEN)'(`FPDUR);
   // assign intExp = zeroCntB - zeroCntA + 1;
   // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv
index 9f6e5981..71aabbc6 100644
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
 /* verilator lint_off CMPCONST */
 /* verilator lint_off WIDTH */
     
-    int i;
+    logic [31:0] i;
     always_comb begin
         i = 0;
         while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1;  // search for leading one
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index ba14499e..2aec1ab1 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -87,8 +87,8 @@ module testbenchfp;
   logic reset = 1'b0;
   logic [`DIVLEN-1:0]    DivX;
   logic [`DIVLEN-1:0]  Dpreproc;
-  logic [`DIVLEN+3:0]  WSN, WS;
-  logic [`DIVLEN+3:0]  WCN, WC;
+  logic [`DIVLEN+3:0]  NextWSN, WS;
+  logic [`DIVLEN+3:0]  NextWCN, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DURLEN-1:0] Dur;
 
@@ -696,9 +696,9 @@ module testbenchfp;
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
   srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
-  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
                 .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
+  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
                 .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
 
   assign CmpFlg[3:0] = 0;
diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl
index 9f2b4647..251522dc 100755
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }
 

From 26e39dd32584b163dbc2d2cfea1acc9d04fe8658 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 13 Jul 2022 09:41:35 -0700
Subject: [PATCH 11/36] removed the +1 in the cvt

---
 pipelined/src/fpu/fcvt.sv | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index 2d9fc21c..4820cf28 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -68,7 +68,8 @@ module fcvt (
     logic                   Signed;     // is the opperation with a signed integer?
     logic                   Int64;      // is the integer 64 bits?
     logic                   IntToFp;       // is the opperation an int->fp conversion?
-    logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
+    logic [`CVTLEN:0]       LzcInFull;      // input to the Leading Zero Counter (priority encoder)
+    logic [`LOGCVTLEN-1:0]  LeadingZeros; // output from the LZC
 
 
     // seperate OpCtrl for code readability
@@ -102,10 +103,11 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
-                             {Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
+    assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
+                             {Xm, {`CVTLEN-`NF{1'b0}}};
+    assign LzcIn = LzcInFull[`CVTLEN-1:0];
     
-    lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros));
+    lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
 
     ///////////////////////////////////////////////////////////////////////////
     // shifter
@@ -119,13 +121,13 @@ module fcvt (
     //      denormalized/undeflowed result fp -> fp:
     //          - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
     //      ??? -> fp: 
-    //          - shift left by LeadingZeros+1 - to shift till the result is normalized
+    //          - shift left by LeadingZeros - to shift till the result is normalized
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
     assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
                     ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : 
-                              (LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}};
+                              (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}};
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
@@ -197,14 +199,14 @@ module fcvt (
     //                  |  0's |     Mantissa      |      0's if nessisary     |
     //                  |     keep        |
     //
-    //              - if the input is denormalized then we dont shift... so the  "- (LeadingZeros+1)" is just leftovers from other options
-    //      int -> fp : largest bias +  XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros
+    //              - if the input is denormalized then we dont shift... so the  "- LeadingZeros" is just leftovers from other options
+    //      int -> fp : largest bias +  XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros
     //              Process:
     //                  - shifted right by XLEN (XLEN)
-    //                  - shift left to normilize (-1-LeadingZeros)
+    //                  - shift left to normilize (-LeadingZeros)
     //                  - newBias to make the biased exponent
-    //          oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp)
-    assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
+    //          oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp)
+    assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})};
     // find if the result is dnormal or underflows
     //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
     //      - can't underflow an integer to Fp conversion

From 31db938e7eaa4ca3dfc369f60431a1494f6fdf6d Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 17:47:27 +0000
Subject: [PATCH 12/36] Added adder input selection to on the fly converter

---
 pipelined/srt/srt.sv | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index b3de448f..f04aa718 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -87,6 +87,7 @@ module srt (
   // Divisor Selection logic
   assign Db = ~D;
   mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
+  fsel2 fsel(qp, qn, )
 
   // Partial Product Generation
   csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
@@ -95,7 +96,7 @@ module srt (
   otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
   // otherwise use sotfc
   // creg              sotfcC(clk, Start, C);
-  // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot);
+  // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F);
 
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
@@ -263,6 +264,7 @@ module sotfc2(
   input  logic         sp, sn,
   input  logic [`DIVLEN+3:0] C,
   output logic [`DIVLEN-1:0] Sq,
+  output logic [`DIVLEN+3:0] F,
 );
 
 
@@ -288,6 +290,8 @@ module sotfc2(
   end
   assign Sq = S[`DIVLEN-1:0];
 
+  fsel2 fsel(sp, sn, C, S, SM, F);
+
 endmodule
 
 //////////////////////////

From 110b762b554adfe5a2964404a776d73383d04b49 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 17:56:23 +0000
Subject: [PATCH 13/36] Finalized sqrt, ready for debugging

---
 pipelined/srt/srt.sv | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index f04aa718..23869225 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -57,7 +57,7 @@ module srt (
   logic                       qp, qz, qm; // quotient is +1, 0, or -1
   logic [`NE-1:0]             calcExp;
   logic                       calcSign;
-  logic [`DIVLEN+3:0]         X, Dpreproc, C;
+  logic [`DIVLEN+3:0]         X, Dpreproc, C, F, AddIn;
   logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
   logic                       intSign;
@@ -87,17 +87,19 @@ module srt (
   // Divisor Selection logic
   assign Db = ~D;
   mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
-  fsel2 fsel(qp, qn, )
+
+  // If only implementing division, use divide otfc
+  // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
+  // otherwise use sotfc
+  creg              sotfcC(clk, Start, C);
+  sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F);
+
+  // Adder input selection
+  assign AddIn = Sqrt ? F : Dsel;
 
   // Partial Product Generation
-  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
+  csa    #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA);
   
-  // If only implementing division, use divide otfc
-  otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
-  // otherwise use sotfc
-  // creg              sotfcC(clk, Start, C);
-  // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F);
-
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
   signcalc signcalc(.XSign, .YSign, .calcSign);

From 11bb3f0a3eee60dc8329e9f28c1840a537c1e80a Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 18:11:13 +0000
Subject: [PATCH 14/36] Test generation files in common format

---
 pipelined/srt/exptestgen.c  |   4 ++++
 pipelined/srt/sqrttestgen   | Bin 22792 -> 22792 bytes
 pipelined/srt/sqrttestgen.c |  17 +++++++++++------
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c
index 61fe74aa..d6bebb77 100644
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp,
   // Print r in standard double format
   fprintf(fptr, "%03x", rExp|(rSign<<11));
   printhex(fptr, rFrac);
+  fprintf(fptr, "_");
+
+  // Spacing for testbench, value doesn't matter
+  fprintf(fptr, "%016x", 0);
   fprintf(fptr, "\n");
 }
 
diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen
index dadc5dc5c8d36055ee1c68c8296279c302aa4efb..06615165395cb6abb37c9d60152d5fe70f15e11a 100755
GIT binary patch
delta 1496
zcmZwFUu+ab7y$5@y+8D>dwYBQyH=zeO}P{r=pj&o!QPT~+YM1tlqL57$b-gGeJCc{
zKA<fYnl_Dc`iZtAMiN2_jgf|Zuqs|_NVtO+j)VYeqKyVU5^I_!wujcP-`svzA#rlI
zv%i^d=9`(FzRSGtGEZiGtszz@t?z#~9?AqAWP)1ZB?A}v56i8&_|6Jfd}!Uw{vEM*
zo-ukqpW12-N{2ipnxZiy{cRQ&mfY?7$-~%8FmzJ0ZF?rDdhU`9@=bCJIZaNG$H|Au
zUyxhLm&onpVREPAHqWO{gWfH&pCR-KoF*g%l7vBl0m4~<Ucv=|<AhOxV}vn*4oLaR
zx_)#!+Z|hQw!7a6IXOYzL(Y(!$XRkd`5w8BJW1X}&XJ?!X~$ij3Tntxz)CWs^HInp
zWG`7IFVF_c$g|`K`44h6`B!oboz~^N?P4s|iZ@`q4JQ57yc>L_2l*X%rF5edal=QY
zZ}9UV%d5MF@h%wydfw>2HvS^M=PLiUZPD<RxMZmjaf$lVV*<G_buGru{$<+(=ClY;
zoqsfP3On#?=#$RW>70h+A4~H!Mqoqq|MHAFeT6moMdDNNru-Uz9)6K`SB*ZJ=lh4Y
z-J{RDd)}Jg^7e<x>sL*q|9-9t_6DLn3EhFRuv?8V(|C8Is9$(?{e7e7_mbRP9xeqw
z^TjsTb+UDxjKKlr_40q>W}*BUKQkoC!}ycwqxhSL-;~p96LYpbh-)d!+jhIy4Av*G
zUch<+>oxF6FzhSA&#iIBI^mmO&4#l$rWX5H;}TWHDu(|n*oNP_P^$3Ri2t!!SgJ;O
z9qdroEZdHUJSDn!DQd6O5b#PlcvXFlPr#RIGtXH(^sx-ns}0Fetfwp9Tj4YCKtySQ
zTU#3Z*?=Qmu`0IS;EUd-z(~yox!Ao82SInus-Pr>3W=gSLH`kh+AD8c2ezN^`t)KD
z=2`W)pt`=w*Fya!+=?%QNxs}VR=<;T1G&N#WC^UpA80GxAr?_BN!#S&S=ihVmN4n=
zhC7J0jp3(?sSC@&2giHOq`=||i|r9=$8EMXdO3dyj_Mse0}u3CNmJm7ragQLjx@#b
zlwhPOReqQjR~#SpG8STDRmYpdn0u)CRQYXLD|Fwapbm^#$^z`1waXZm`OENNU#-7*
m+Bk2eSr<ysya~k_)DB0?TDWJbaKViEBdRkaZun<2>iQRngYvTg

delta 1453
zcmYk+Uu;uV7y$5dZr9m%_x9fFI@?ZRby*g0VMAt|@z2`1b-kG)CUVsn6nz2~6E7xa
zJWN(Ige}Bv_&FRt5O`5p)DWs>zH}H8I(@*A5Y&*EzEryeO+!*CI(Pg|@7J-D_I~$w
z&Uel|=iJjyT;LNI_{_53Fj=)2Cw^Tt9o>baC#p9NoZ~k_W#<3x+OhUvHu24e9~XOu
z%Aqe$XXcdGyuOl_U{>z?G_2k8^Vl&jW4Cnk{e%$WmTq{*$QF5o{5<(B^2_9X<k!i2
z$@|GKksb0b@*&s5-jrLBOv|S~O_-2)iZCPb7-3GLn{Y;=lkmC3R>FCSHo}5L3uOI~
zv7>eFau?Rz<*t!0lK&<zk^dqulYb}wLjH}MCtoKQ$ydlF*JIwxR8gUXESb^%d`tF`
zzakstPstJTX>yEwlDvgHMc$6iCDn@XFqVx36Kt^=@_{XUJNO%3<%eNk!!~8f11B5a
z;Wnu1rk%U+ELq2s6>IYH;$FNjw*D)`al0hzvNjH2P39Klc`HV-0e3i=chhsl(1xx7
znehi)QQzeA@Pszh_R7YtJN^-3Jh@so{_bk`;g4p1xMW+Ch2kij)7t%=ddm?xK7(Z~
z65egZm~DNqO~x0V-CD55Z~BU<J0P?#_#yZ@_;&QqxLqxU_$Q}iS&2W8JBXi~5Z7n>
z&fgYd4hM58LO2-DV3fnZct?nf7)Mrx$YWIh6M}KZZeWaIH1ItoFs7l&=-?ITGaBy;
zVP8C2)zXI70sI=bVc2NrQ*gp)X`IF#nU&3bLF1^>uhlCvaLL%om%ubr-V#2_n|Z^W
zR>3j%KxvbuRJ6?Fo2Q!m75r&d3ta8)52Q(DEwlNNANjfq8q9Y*q}G}Xm<H*N`*lTj
zm6E0GCfy|_Ijz?+dwP!4tL611tc`U?0(UDQ*xT!8_>yGt!qs>q<asDF+WRDru1~Mp
z=w3WbXFf~|H`o4uHZ(F1Gue`!Q4*aVo`#3}!wSx8sQ)_R!GZ9O+U#mwcpcs;+WI1D
z^)af)ofsad=Ujz@$r0|rO0r8SY0y0Q9G`^)gK^wQm><kW4Vr(ge|tT{iO7k*mkQ&v
zPo<`#qiUks{1YXOV9vcvW6xyztj|>51S`9{0<{(6xUKf1rjN$;1>H{j;eg%cuPxkx
Pvvv>6*+!;l$2|W7ZMD|(

diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c
index b4ece147..710fc32f 100644
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@@ -33,11 +33,7 @@ void main(void)
   double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.2, 1.01, 1.001, 1.0001,
-<<<<<<< Updated upstream
-			  1/1.1, 1/1.5, 1/1.25, 1/1.125};
-=======
 			  2/1.1, 2/1.5, 2/1.25, 2/1.125};
->>>>>>> Stashed changes
   double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
         11, 12, 13, 14, 15, 16};
   int i;
@@ -69,14 +65,23 @@ void main(void)
 
 void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
 {
+  // Print a in standard double format
   fprintf(fptr, "%03x", aExp);
   printhex(fptr, aFrac);
   fprintf(fptr, "_");
+
+  // Spacing for testbench, value doesn't matter
+  fprintf(fptr, "%016x", 0);
+  fprintf(fptr, "_");
+
+  // Print r in standard double format
   fprintf(fptr, "%03x", rExp);
   printhex(fptr, rFrac);
+  fprintf(fptr, "_");
+
+  // Spacing for testbench, value doesn't matter
+  fprintf(fptr, "%016x", 0);
   fprintf(fptr, "\n");
-
-
 }
 
 void printhex(FILE *fptr, double m)

From 81f396f885d4f9ec62f7d4b6e2bc27f43e6df5a7 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 18:30:18 +0000
Subject: [PATCH 15/36] Testbench accepts standard test vector files

---
 pipelined/srt/testbench.sv | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 1f6f1561..537fbb3e 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -55,21 +55,11 @@ module testbench;
   parameter MEM_SIZE = 40000;
   parameter MEM_WIDTH = 64+64+64+64;
  
-  // INT TEST SIZES
-  // `define memrem  63:0 
-  // `define memr  127:64
-  // `define memb  191:128
-  // `define mema  255:192
-
-  // FLOAT TEST SIZES
-  // `define memr  63:0 
-  // `define memb  127:64
-  // `define mema  191:128
-
-  // SQRT TEST SIZES 
-  `define memr  63:0 
-  `define mema  127:64
+  // Test sizes
+  `define memrem  63:0 
+  `define memr  127:64
   `define memb  191:128
+  `define mema  255:192
 
   // Test logicisters
   logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file

From 9b7e63f48227fa5b28228b50d3b7653dc99448b9 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 19:34:04 +0000
Subject: [PATCH 16/36] Lint error fixed and added comments to preprocessing

---
 pipelined/srt/srt.sv       | 62 +++++++++++++++++++++++---------------
 pipelined/srt/testbench.sv |  2 +-
 2 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 23869225..3e41c16c 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -54,13 +54,14 @@ module srt (
   output logic [3:0] Flags
 );
 
-  logic                       qp, qz, qm; // quotient is +1, 0, or -1
+  logic                       qp, qz, qn; // quotient is +1, 0, or -1
   logic [`NE-1:0]             calcExp;
   logic                       calcSign;
   logic [`DIVLEN+3:0]         X, Dpreproc, C, F, AddIn;
   logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
   logic                       intSign;
+  logic                       cin;
  
   srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
 
@@ -76,29 +77,30 @@ module srt (
 
   // Quotient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm);
+  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qn);
 
   flopen #(`NE) expflop(clk, Start, calcExp, rExp);
   flopen #(1) signflop(clk, Start, calcSign, rsign);
   flopen #(7) durflop(clk, Start, calcDur, dur);
   
-  counter divcounter(clk, Start, dur, done);
+  srtcounter divcounter(clk, Start, dur, done);
 
   // Divisor Selection logic
   assign Db = ~D;
-  mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel);
+  mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel);
 
   // If only implementing division, use divide otfc
-  // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot);
+  // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
   // otherwise use sotfc
-  creg              sotfcC(clk, Start, C);
-  sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F);
+  creg   sotfcC(clk, Start, C);
+  sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F);
 
   // Adder input selection
   assign AddIn = Sqrt ? F : Dsel;
 
   // Partial Product Generation
-  csa    #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA);
+  assign cin = ~Sqrt & qp;
+  csa    #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA);
   
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
@@ -128,30 +130,40 @@ module srtpreproc (
 
   logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
   logic  [`XLEN-1:0] PosA, PosB;
-  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX;
+  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
+  logic  [`NF+4:0] SqrtX;
 
+  // Generate positive integer inputs if they are signed
   assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
   assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
 
+  // Calculate leading zeros of integer inputs
   lzc #(`XLEN) lzcA (PosA, zeroCntA);
   lzc #(`XLEN) lzcB (PosB, zeroCntB);
 
+  // Make integers have DIVLEN bits
   assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}};
   assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}};
 
+  // Shift integers to have leading ones
   assign PreprocA = ExtraA << (zeroCntA + 1);
   assign PreprocB = ExtraB << (zeroCntB + 1);
+
+  // Make mantissas have DIVLEN bits
   assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}};
   assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}};
 
+  // Selecting correct divider inputs
   assign DivX = Int ? PreprocA : PreprocX;
   assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
-
   assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
   assign D = {4'b0001, Int ? PreprocB : PreprocY};
+
+  // Integer exponent and sign calculations
   assign intExp = zeroCntB - zeroCntA + 1;
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
+  // Number of cycles of divider
   assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
 endmodule
 
@@ -160,7 +172,7 @@ endmodule
 /////////////////////////////////
 module qsel2 ( // *** eventually just change to 4 bits
   input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
-  output logic         qp, qz, qm
+  output logic         qp, qz, qn
 );
  
   logic [`DIVLEN+3:`DIVLEN]  p, g;
@@ -187,7 +199,7 @@ module qsel2 ( // *** eventually just change to 4 bits
   // Produce quotient = +1, 0, or -1
   assign #1 qp = magnitude & ~sign;
   assign #1 qz = ~magnitude;
-  assign #1 qm = magnitude & sign;
+  assign #1 qn = magnitude & sign;
 endmodule
 
 ////////////////////////////////////
@@ -198,15 +210,16 @@ module fsel2 (
   input  logic [`DIVLEN+3:0] C, S, SM,
   output logic [`DIVLEN+3:0] F
 );
-  logic [`DIVLEN+3:0] FP, FN;
+  logic [`DIVLEN+3:0] FP, FN, FZ;
   
   // Generate for both positive and negative bits
   assign FP = ~S & C;
   assign FN = SM | (C & (~C << 2));
+  assign FZ = {(`DIVLEN+4){1'B0}};
 
   // Choose which adder input will be used
 
-  assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0});
+  assign F = sp ? FP : (sn ? FN : FZ);
 
 endmodule
 
@@ -216,7 +229,7 @@ endmodule
 module otfc2 #(parameter N=64) (
   input  logic         clk,
   input  logic         Start,
-  input  logic         qp, qz, qm,
+  input  logic         qp, qz, qn,
   output logic [N-1:0] r
 );
 
@@ -236,7 +249,7 @@ module otfc2 #(parameter N=64) (
   logic [N+1:0] QR, QMR;
 
   flopr #(N+3) Qreg(clk, Start, QNext, Q);
-  mux2 #(`DIVLEN+3) Qmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux);
+  mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux);
   flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   always_comb begin
@@ -248,7 +261,7 @@ module otfc2 #(parameter N=64) (
     end else if (qz) begin
       QNext  = {QR,  1'b0};
       QMNext = {QMR, 1'b1};
-    end else begin        // If qp and qz are not true, then qm is
+    end else begin        // If qp and qz are not true, then qn is
       QNext  = {QMR, 1'b1};
       QMNext = {QMR, 1'b0};
     end 
@@ -266,7 +279,7 @@ module sotfc2(
   input  logic         sp, sn,
   input  logic [`DIVLEN+3:0] C,
   output logic [`DIVLEN-1:0] Sq,
-  output logic [`DIVLEN+3:0] F,
+  output logic [`DIVLEN+3:0] F
 );
 
 
@@ -275,7 +288,7 @@ module sotfc2(
   logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
 
   flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM);
-  mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux);
+  mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
   flop #(`DIVLEN+4) SMreg(clk, SMux, S);
 
   always_comb begin
@@ -305,17 +318,18 @@ module creg(input  logic clk,
 );
   logic [`DIVLEN+3:0] CMux;
 
-  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, (`DIVLEN-2){1'b0}}, Start, CMux);
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux);
   flop #(`DIVLEN+4) cflop(clk, CMux, C);
 endmodule
 
 /////////////
 // counter //
 /////////////
-module counter(input  logic clk, 
-               input  logic req, 
-               input  logic [$clog2(`XLEN+1)-1:0] dur,
-               output logic done);
+module srtcounter(input  logic clk, 
+                  input  logic req, 
+                  input  logic [$clog2(`XLEN+1)-1:0] dur,
+                  output logic done
+);
  
   logic    [$clog2(`XLEN+1)-1:0]  count;
 
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 537fbb3e..b83e6b00 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -101,7 +101,7 @@ module testbench;
     begin
       testnum = 0; 
       errors = 0;
-      $readmemh ("testvectors", Tests);
+      $readmemh ("sqrttestvectors", Tests);
       Vec = Tests[testnum];
       a = Vec[`mema];
       {asign, aExp, afrac} = a;

From d57fb6f98a364e1323371cc8a05776c034ebf63b Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 19:46:48 +0000
Subject: [PATCH 17/36] radix 4 files removed from srt and divlen modified for
 sqrt

---
 pipelined/config/shared/wally-shared.vh |    2 +-
 pipelined/srt/lint-srt                  |    1 -
 pipelined/srt/qslc_r4a2.c               |  198 -----
 pipelined/srt/qslc_r4a2b                |  Bin 16064 -> 0 bytes
 pipelined/srt/qslc_r4a2b.c              |  190 -----
 pipelined/srt/qslc_r4a2b.tv             | 1024 ----------------------
 pipelined/srt/qslc_sqrt_r4a2            |  Bin 16152 -> 0 bytes
 pipelined/srt/qslc_sqrt_r4a2.c          |  198 -----
 pipelined/srt/qslc_sqrt_r4a2.sv         | 1026 -----------------------
 pipelined/srt/srt.sv                    |    4 +-
 10 files changed, 3 insertions(+), 2640 deletions(-)
 delete mode 100644 pipelined/srt/qslc_r4a2.c
 delete mode 100755 pipelined/srt/qslc_r4a2b
 delete mode 100644 pipelined/srt/qslc_r4a2b.c
 delete mode 100644 pipelined/srt/qslc_r4a2b.tv
 delete mode 100755 pipelined/srt/qslc_sqrt_r4a2
 delete mode 100644 pipelined/srt/qslc_sqrt_r4a2.c
 delete mode 100644 pipelined/srt/qslc_sqrt_r4a2.sv

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 54fa7a9b..ad52be2e 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -103,7 +103,7 @@
 // division constants
 `define RADIX 32'h4
 `define DIVCOPIES 32'h4
-`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 1))
 `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
diff --git a/pipelined/srt/lint-srt b/pipelined/srt/lint-srt
index fd42df88..399201be 100755
--- a/pipelined/srt/lint-srt
+++ b/pipelined/srt/lint-srt
@@ -1,2 +1 @@
 verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
-verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
diff --git a/pipelined/srt/qslc_r4a2.c b/pipelined/srt/qslc_r4a2.c
deleted file mode 100644
index 8e68f998..00000000
--- a/pipelined/srt/qslc_r4a2.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
-  Program:      qslc_r4a2.c
-  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
-  User:         James E. Stine
-
-*/
-
-#include <stdio.h>
-#include <math.h>
-
-#define DIVISOR_SIZE 3
-#define CARRY_SIZE 7
-#define SUM_SIZE 7
-#define TOT_SIZE 7
-
-void disp_binary(double, int, int);
-
-struct bits {
-  unsigned int divisor : DIVISOR_SIZE;
-  int tot : TOT_SIZE;
-} pla;
-
-/* 
-
-   Function:      disp_binary
-   Description:   This function displays a Double-Precision number into
-   four 16 bit integers using the global union variable 
-   dp_number
-   Argument List: double x            The value to be converted
-   int bits_to_left    Number of bits left of radix point
-   int bits_to_right   Number of bits right of radix point
-   Return value:  none
-
-*/
-void disp_binary(double x, int bits_to_left, int bits_to_right) {
-  int i; 
-  double diff;
-
-  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
-    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-      printf("0");
-    }
-    if (i == bits_to_right+1) 
-      ;
-    
-    return;
-  }
-
-  if (x < 0.0) 
-    x = pow(2.0, ((double) bits_to_left)) + x;
-
-  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-    diff = pow(2.0, ((double) -i) );
-    if (x < diff) 
-      printf("0");
-    else {
-      printf("1");
-      x -= diff;
-    }
-    if (i == 0) 
-      ;
-    
-  }
-
-}
-
-int main() {
-  int m;
-  int n;
-  int o;
-  pla.divisor = 0;
-  pla.tot = 0;
-  printf("\tcase({D[5:3],Wmsbs})\n");
-  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
-    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
-      printf("\t\t10'b");
-      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
-      printf("_");
-      disp_binary((double) pla.tot, TOT_SIZE, 0);
-      printf(": q = 4'b");
-
-      /*
-	4 bits for Radix 4 (a=2)
-	1000 = +2
-	0100 = +1
-	0000 =  0
-	0010 = -1
-	0001 = -2		
-      */
-      switch (pla.divisor) {
-      case 0:
-	if ((pla.tot) >= 12)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -4)
-	  printf("0000");
-	else if ((pla.tot) >= -13)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 1:
-	if ((pla.tot) >= 14)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -6)
-	  printf("0000");
-	else if ((pla.tot) >= -15)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 2:
-	if ((pla.tot) >= 15)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -6)
-	  printf("0000");
-	else if ((pla.tot) >= -16)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 3:
-	if ((pla.tot) >= 16)
-	  printf("1000");
-	else if ((pla.tot) >= 4)
-	  printf("0100");
-	else if ((pla.tot) >= -6)
-	  printf("0000");
-	else if ((pla.tot) >= -18)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 4:
-	if ((pla.tot) >= 18)
-	  printf("1000");
-	else if ((pla.tot) >= 6)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -20)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 5:
-	if ((pla.tot) >= 20)
-	  printf("1000");
-	else if ((pla.tot) >= 6)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -20)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 6:
-	if ((pla.tot) >= 20)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -22)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 7:
-	if ((pla.tot) >= 24)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -24)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      default: printf ("XXX");
-			
-      }
-			
-      printf(";\n");
-      (pla.tot)++;
-    }
-    (pla.divisor)++;
-  }
-  printf("\tendcase\n");
-  
-}
diff --git a/pipelined/srt/qslc_r4a2b b/pipelined/srt/qslc_r4a2b
deleted file mode 100755
index f719bbf471bfc1094ffe5bdd0d6441d7c2426e9d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 16064
zcmeHOeQ;FO6~CLXB*BmkpF;d#p-TBMO9COq4>SZ87AFYApwem|o9rgpHrb848wm~{
zv8k05VXVy9I8*$n(-B*(`GfIeoJx=?GPM@Qahlp0jSkFg@B;`~SB<>>&b{}%ygU<T
z>a;WcV^8M2bI&=ybMJZQ-uH6vzWa9gt5#;YT!JZE+$czDFI1WeHvE7KLR5$)Vm!_>
z#Fb(!)MIc|XtR^|W+?;nT#d^HC%<V_7=d4|)PgxjNSypeqb>>v#9Zj>AU`)5wCv>T
zbzscdZnftnXOLH(EnY(a5Ocd9h4MtPG>a#e4(Ba8FKwE$ABp7Gt^K;SA9KNY%{iZx
z6FQdYd^&jvY(&hRevR7C$yX>h!JOJ4y7go-^j+Fpr~Lw6S&eY=8<biw=l0gaPj{_G
zT%2(?SL^mh>xWG`kIZ#<iUm!P+VZjmO`+nZNVK)BxNS*!ae0|95%VpS4O3iR95kj@
z-LiqYrH6DnjUuaE$y^`VCTlz4|NhP5ch>H`ajpNc<K=TVRK7doZ#h3948=`8gdw8(
zxXVeVat;o{sC?XqlF|`B9)7!-_-OP~7$%Nj+&GG!+VPHnUpWf?ic#>_jH168`dK)>
zMaJryHAW~L4>v>-$#A@8&GM#LG+Yy`Z3+v+XlRZ_jYKjSPa1}h&=G+xvCRP2wFQm(
zNHo|KxtDy=vvu0HIT(qGmeyolV=ykDZ>A0|r&66Nt*TmGx!hRjTX?PD;-WIExX`zl
z;?6=Zxp4Qn@W*{c<pgZhpI#iDzj_4SHAXyuG7%xSf%7;$Ann=WVXbQ|isVtv`v?r;
z=Lg@nn<gmze*Ju4y;tjVE0mq}Ecf1|^fY!jR?_s@^gM=1pR(!Y^OKaIGd8^%hf1He
z>1muYy{Yv)c3FSSrl;|rp-cuc8OUTHlYvYIG8xEZAd`XrZ3fPHuKX&{@kwrAm-`<J
zgb3W<lgu)Y1v>tmdssGXE`Av50rTp;V4fL*@Qq{{=>O0(&4;8+i-dvx<17QcPoh@{
ziHCfbL*8JQOFv!RdHlzL&W{2eC(f*^@t5|L9u4eXO7l~5$|4}o*ZVv(elFvIej_x2
z-R^2)*QS$G5c*UqBlJA8&oiT)fQK~)ey+qXo<(f#g+S+-z@g7>3LHvj2V8Fij$cSl
zMg=E*s)Agzuii5QakD>NU;9!ri`MxY0v$_Vqhg@*WHLXnYv~&hQhB)UR0yXx+<$`X
zx(mL;{QEb<NEK~>ACg@%;F;i0b#6P++1fYYDfFk#dA5&#sz8W(Pt75+_3rZbebHO*
zIo&hhnb(8x9+Ad5Z`KDq(Vo;x7fchS?L8zOtM@F}(0k9Em#7ow`%iTG&wpHSumnP?
z9k>qvdDkxg`Ro0C+fVgB3oX?O7k}!WYjgwt6KJ7qSPQqo6OpY+4S4R55vA<aZ$gE`
z!uw)8)yv^c=kNlZ{xn@*TD6z2qB`}BX?{HYU@1lTKFTr!1D-nxi?rAR?mBwdmbF69
z3+`I{D$>!H-s#?mg6BndHCX-CIfM0`?o}ur@eiU_(SJKuDo6ZjS)PhAJm*Ny9M20q
z9qAm;1J7cu(b*H&J^6kDVKj{yFT}1fS%I#=?kVYfs9eWx#9(^SeHts*Lw#fG52f>N
zxMaKM;1b|dvk`ZH9CZNjwhfRCKp6qt1X!;C&)EQjTRkcZCk6p1QGf?+fO7!QFAS-}
z1n>Z$GIfAN`h^EA-f6>+=aBXi-~%k(0Jz=;utJIx;8g`EvH`y0kk%7mp8|Y2f9R^u
za!4fv*b0COX(oxy3V+mwFW`^_0jd;WzYSo8^ugBv%vXR1Yyc~yR|zmq0qSjlGaS-B
z0`%iSp+ee2;(HL0gmN3+!yz>j;AI7vVgpzqRS}>|0nX1GN`e*Ad;)A%fOl+w&p4!U
z1h@?V71GyVL0qNqPucJjIHdOoFk1nBW&^kZkYnj(0=N|*Xal6V<GKiN0{1_zy21wd
zghSd)fWrW&kQzxmpzwt@{3H%(6#@1t!0EX|Nw7kiO@O!pyln$mA-M^#UICu70j!Wt
zd<j4a04k(+Nt~(h+iZ9%qyq%NuO@Q*+W<M7rM(3B0Cx_Je;dFGDNcY_6`;rlIK^37
zPXPM8MTJyJ;vR)RHD@RZmu}Bj4}qBgeB6TO-m+P9V3oa=P1b5y|7f$Ox9W%B$uz8G
zus&q7reKx#{t>cHRbjkB<#7k^Kp?5-Q9d}1O20!1-ql0i9eq1GdR#cWdXA;(G1s?a
z_vF`srG(4_j-Hcw$MB@Q0z!vB?dsT;-r?EPhn~XoF_)BmIkJiz6gppn^4$03<2LWu
zPIrom{y{hm=6Loz4yTu>?xTlsqKCAcLs+v0GG!=}flLN68OUTHlYvYI{$DdN^u|g>
zSa8lXpTtpi-ZYQk_~GZK`6-U}f0^c_FR+ePw8L>d6|T#&ucvpm^e(9Pv}v}0=bkam
zF7RpK-QY{WX&y84tZBXpe)|9x&frA!Zj{J%Z?$l>6}m3V&&hoV(>>B(j)Ur)O@I|S
z@|D?DDAGJ@6ZEv$6$OPW3#Q%dxny&0ySS<7x_OJPnnh~zzY|9zaMF>KAkrsskiQq4
ziQ0M)$9lwCj`oGWpzs%2%g2w+E_NX#8OyU!H{cq6i>2j|{ZTjBGnC0dCIguaWHOM+
zKqdp33}iBp$v`FpnGAf_4Ddcb-q%M<5+d4LNW?PjB_zsM%-UFp*IJTROjCTimU%zn
zWm@KyOOcj&Um>lBh}i!51v5r+H(tsR(GrNL3$Lb#crReDw$rcGM9(NDF40_vBc^DX
znRXN1dS4)3dq|RNJd|kNM#OsyFIA#w)D5$KzYdJ!?$NkB&AI(6wahrYyph+3yh;_`
zWlH%DT_2XeQs%g@Oq6n^w&PcSDf4yc>i=uZ+6V5au|zL!E9{G02j*g8ZiTY0TE6@`
z@0<;_t<hwwx3tu^&{tAidTpzeOXq6MMJl54I7XZ`oe;`9?U8*@C&sG*lfXy9Q@><k
zcWk%gddQFTeGdI1t>^m$qNLS0yF`&FtWYxFFSNVbC8i1<=gKO?RurgyzVBGSQ{tys
z`*2zRGzufdKiYL2ko8O!>t=EdLwuxqUX%Dsg|pxPBHbog`{%iq_a%Osb^oybJPH$#
zhshO6qi<J{hB#5FhMn_~sFCz@q4y3$Dxps5@pP#eZuCQ6D5hKaXKNMoBh~*S(q~N&
z{Jdg(P}k$EKMK8<K{mb(`UzRn1&?bs{gU+5vUcJ;JPLgw@-wZ1k%~7!KM|I8J>KMR
zP%u5F@r4fj?=}8$eR1+PBH*9b`iS12Eq_~r{uSscPG>zwp!dS=qz|E=<y1l*gnnWc
zp8k&eLfl#xPnP;(*!Lc+jTp&bgMcEDY^|^N)rq0~^+vMUsG}YB2_Qo;qoFBQ8*DN{
z$yhvL1Y6rgU97pKDVz+4d?h0zXp4Qs2*%^VEk-z+jBgS3@nCb<2(>mhZ-I+VGJs4D
zgWjEJsx#tc!G*Pm$}m<|FI(d`{I{&2-Tf;z-m+}X>g7=5qX@$v(AL0;YGJIZT3flS
z%2>N{<*oi2qh?uUmERC8O+nt&U%|WfX{-LXw*FgI?CHPg_XgomFo`;2n=KD~5FjrY
zm6A<;PXJ-~hY3a~5i=Tt(GY#KuzD>}p-9wdO@u=NIl{@PO(e9h{1zb;NwgTX_*Mbw
zGYow6z)7I5Ax8YfVfg0_L-En|QksSECAKstgSFtvxZ;gmibU~gMT_u7W67|uA=>I|
ziN{*P@#Geps<t(PuV5k}ty)&Ox|l|hgf|8gjlvh&5=9e=C*#U;PdJ{4#G=Cl1Gad$
zDM${w)Y6m`J{hJDMPEY<A{7$hI^j!(+aS`g0y-wUz!z@R1F12D+E`L`s1c<+xQwQP
z%@I_l+>vhK!%%L<2p!=r`p;D~f5NSY{~3y$-?Q=jNt-2>%f!EjgXS#kKiYR&geBs6
zijssVN5Ssz{C<mmC$|$n?^s9wAMO79|Cza$ftox0Z-*YQ24WmP&kLDz`#djn#vg<~
z%?a6`=Z(yJU?ZX!DbMWB^S20eG^JpFo`*7j1~xlU9d^1K0?p~zpXYJR`EkneGmiOo
z_|qJbWu7N8U#A=69CQ2}!)_EPr|i%3R^~k#$nCQ~U;nSQf2GFpJeYZj&7XXn{!c<j
zIpO-P{~!9od3~U(k+E`__-}C7{k__cIk(R~+H5uRw8MX$1~6y*X#U*}|4rI~xtAR^
z=lYnx?C@{b2IhyT<7~u!tU8E-{rY*{!kpJ%jCbBYuWNtqf8o_g%>!Cw=4}73Fi`vK
zAMh$9^Ba_Cot^Ff6BfJwPOpM8f6MYz1s*@FK8XUhj>>zGbiA(Q?=78pmYJW0kH_ZE
z^YQ(~RNK&aRnm~m{@=hrW0%)kJWuBDXE`z4jqJyH`~!UH`YnIG?(ALwC1{;u)|vg8
zkA<_{UsPyliQ?Q2SHONO(>$AW+&<3(3$?!!@9e(`x_u4jY6bhxwlu0hu@c!ij~!)E
z=Owzr{W`EKpr$D;$HUK`Jm@%f<*Cknv>*dzIV$INm6hh=Bl~|OzpNCZ$l>4+#lHca
C!tRp*

diff --git a/pipelined/srt/qslc_r4a2b.c b/pipelined/srt/qslc_r4a2b.c
deleted file mode 100644
index 94a3a4cd..00000000
--- a/pipelined/srt/qslc_r4a2b.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
-  Program:      qslc_r4a2.c
-  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
-  User:         James E. Stine
-
-*/
-
-#include <stdio.h>
-#include <math.h>
-
-#define DIVISOR_SIZE 3
-#define CARRY_SIZE 7
-#define SUM_SIZE 7
-#define TOT_SIZE 7
-
-void disp_binary(double, int, int);
-
-struct bits {
-  unsigned int divisor : DIVISOR_SIZE;
-  int tot : TOT_SIZE;
-} pla;
-
-/* 
-
-   Function:      disp_binary
-   Description:   This function displays a Double-Precision number into
-   four 16 bit integers using the global union variable 
-   dp_number
-   Argument List: double x            The value to be converted
-   int bits_to_left    Number of bits left of radix point
-   int bits_to_right   Number of bits right of radix point
-   Return value:  none
-
-*/
-void disp_binary(double x, int bits_to_left, int bits_to_right) {
-  int i; 
-  double diff;
-
-  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
-    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-      printf("0");
-    }
-    if (i == bits_to_right+1) 
-      ;
-    
-    return;
-  }
-
-  if (x < 0.0) 
-    x = pow(2.0, ((double) bits_to_left)) + x;
-
-  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-    diff = pow(2.0, ((double) -i) );
-    if (x < diff) 
-      printf("0");
-    else {
-      printf("1");
-      x -= diff;
-    }
-    if (i == 0) 
-      ;
-    
-  }
-
-}
-
-int main() {
-  int m;
-  int n;
-  int o;
-  pla.divisor = 0;
-  pla.tot = 0;
-  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
-    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
-      /*
-	4 bits for Radix 4 (a=2)
-	1000 = +2
-	0100 = +1
-	0000 =  0
-	0010 = -1
-	0001 = -2		
-      */
-      switch (pla.divisor) {
-      case 0:
-	if ((pla.tot) >= 12)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -4)
-	  printf("0");
-	else if ((pla.tot) >= -13)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 1:
-	if ((pla.tot) >= 14)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -6)
-	  printf("0");
-	else if ((pla.tot) >= -15)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 2:
-	if ((pla.tot) >= 15)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -6)
-	  printf("0");
-	else if ((pla.tot) >= -16)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 3:
-	if ((pla.tot) >= 16)
-	  printf("8");
-	else if ((pla.tot) >= 4)
-	  printf("4");
-	else if ((pla.tot) >= -6)
-	  printf("0");
-	else if ((pla.tot) >= -18)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 4:
-	if ((pla.tot) >= 18)
-	  printf("8");
-	else if ((pla.tot) >= 6)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -20)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 5:
-	if ((pla.tot) >= 20)
-	  printf("8");
-	else if ((pla.tot) >= 6)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -20)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 6:
-	if ((pla.tot) >= 20)
-	  printf("8");
-	else if ((pla.tot) >= 8)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -22)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      case 7:
-	if ((pla.tot) >= 24)
-	  printf("8");
-	else if ((pla.tot) >= 8)
-	  printf("4");
-	else if ((pla.tot) >= -8)
-	  printf("0");
-	else if ((pla.tot) >= -24)
-	  printf("2");
-	else
-	  printf("1");
-	break;
-      default: printf ("X");
-			
-      }
-			
-      printf("\n");
-      (pla.tot)++;
-    }
-    (pla.divisor)++;
-  }
-  
-}
diff --git a/pipelined/srt/qslc_r4a2b.tv b/pipelined/srt/qslc_r4a2b.tv
deleted file mode 100644
index b92d81e8..00000000
--- a/pipelined/srt/qslc_r4a2b.tv
+++ /dev/null
@@ -1,1024 +0,0 @@
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-4
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-8
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-1
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-2
-0
-0
-0
-0
-0
-0
-0
-0
diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2
deleted file mode 100755
index 5cff70cdf9d63dd415b92ba2ce9092b7da87695f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 16152
zcmeHOdvsLA8J`WrB!X;$pnwYN1Lc(^;Z;B$0kUD^0i_5BeBNxbn`DnkcHQ0ZQbIJM
z<iuENTaC4+r5>MEPpw*|r#-UxK<ueCSZdSO)~MjwXq6C9BO=@1H#6U6a+}zmwtuxd
z=iK|veBbZ)&CGY_-aE59cbjKLNruBAI1Let1eM~sN>ii_>%XBh5Jh6HI0NSiV!SvF
z<)Koi>_tkzs!~Trf0?cILm-JahB5=lbCp`K<Q9@7-e4&^OIcV7-4Ehrl1Z2C^4-%F
zk0skv^|_Q?*yMu*Kw@d~C^kFurCID;*k3m4xa`o9c~m6cF3sDec`OCvwd8nGOz4=a
z<7t-#uu)=Z=T&Q-T^1{vU`cgQy7F+$|L@edO7q%W(v7go1xhVga(&+fkK*#LCcez8
zb$x?fhp>(#OMN-T^qOG#oLSRr{P{J(y82D|o952RpEJuH4Y_B?CQ!RvIA}~QzkDrS
zmN@Bj7)3^-DziVbjna0)U$iKH%R&F^>xSLk_;$f#_cYy_^Vu}QP`imk7)s=iFFBPd
zpM!%iDjv5Wr*tF_1b$mfa<CMJVdMbDs|L|iJ+1-pHw=O=8AQKn5dC@3yKvakG$>ql
zCCZoKxL^Riuu9jgwETfcpeh)R1tM!!E~^RE1=je=YXZWus%k@ZRy5{|#4Jle2Ly&g
z8v(A^<g+S+b-tS57UE(gRA_FkFIXqS^|2_ODyn@E5sn1wVwD1zS{fX4C|99!%U6^x
zU1rU2&zL7TJ9AblJHtJjx{-k^?7(>_4u{OC&QX3FvNRrCIGVqG4=(pm@g3wT5vLiF
z$N96;K1BRb>*~h^F-$VVV+2Ox=MUfayYiI2O+Q~)U#9ihMas^4E_W<edK!D?xa>6a
zd=C&ZVd(XvSvu%8^jrrSdJR2|b56Upo_si12^lx^bpLSLZ|LQ7UmMyDy}Eyxk(TKQ
zq$7}yKso~H2&5yBj==v%1dch!pYS&QE!*3Y`N!!(c(=!68Oe5U)63Z}$p9p0-wJh4
za$*yNbAljzHCcMP-c2TxJ7t+BWIbI6Qe~PL_H^w{m1#oQ)Achh!`oKrtyoBZDCgVC
z6K&=5Y~|5rx$xuC<^$Jzo8R*`9qL}S##0zCe9haskmje!(Z}3E9ItdcC)|ccpo6Uo
z$Aar^%^XQ$UT<tPy8aeG)b*TXr*lFh;a<`b_%1TG&VG-?<Wt_}Ztvbt7JK*h4)Hqn
zc@Lb5je>)Rw1ezqXQguj`o(;TPvb%|i~6Z+y-f>mrL4F4aBR4@W#R28B)U!|lL<e9
zvoCWi%8u*6?Z@xh2qQUK3mz_3L60-flW5+0sJXtg$C>L%9CJ3D@yrMzDxGWglC7h~
z)A@NvrSnL<$2ldA<~=NpllE77optfVqo@$MhBy`5E1lEVc5Ilmn=ay1&!J||@sCFA
zDL^4n23(WpxTD2${8CS6!-ri@Kucah@gz1((-n9Qp@!mqHI#sfmaR<mIM>S-B~15a
zaM-VTS^da4+&piyr<eNItLhuBTJ`I%lF5(8?kS`ezJR=pK#%iU!Xhk2gWFzv$%tC6
z^SR6m@T*EwXYY>8vB)@|&m0L+IWg;GWpid0vKSR7htt)0H4%g7S3M_DiRfy<qT^Lh
zugqVDJYXLw*lyA#s)2>`SFmPn>dbOJhxi<M4UwmCH|&3bFnj10P|=@>?(MONCy4ia
zW-EAmJ5Q_J+neKjfw*si+fLke#iccm;(iVWYLA(<e|S>XzC2ZXQ*V~@j>TBqG{?QI
zqqc)gEslcIGBm^6=4~BaMSMs5B8>LuGe>}^t6y+-!&pEkkh%F))wve9@5+QXK;NI*
z2hEbuFNPV=GJ^gIt1Uox8_?dH$?NY-=xl=iR6$z}=m!$|qzOHpptk`EANQGtqhG>s
zgYr>o_&7(l>}nIbkDx^gI^KY)PRuf)_Y-uqf*zaF*Wb@%%SM>cFhM`WsuTU)Z$J-8
zXySAuG#-NP0Ti7$Xd0%G;bG<DHp9mR*@^F&PzOP8QqZdmsOm(>guZnG&{YcRHlW94
z%Suh?;{=_kphFC3r-WW;LK_Ks0wWonaGHiHGQ0}|0<mXuU(8e|+OdvL4W|VJeNI8Y
zYd}>eequuN2->2cK?ABfahnPK;0r*@6m*dR{j2Q6^(OR}1f2~iI&qn4cz_J&Djz2%
z^>yMxc_oIK(2WE=iq{MJ`-TA>CZUJ%dq%3iD+szzK_4`rsuO!m==lV_UqLq;(6?nL
z?lYmE^#V#iS)vo$O~YTvuul1yZTPrEwyea2K26YR3VON$Rh@8|&^rj~P|$ZS=?hJ_
zY}pBVB=Y0pT7tfXcL_rCtO4zi(1Rwlkf4tPicZ8$!x%E$t$c(HAF30#n9z?u2ee8-
zJqFY%`+JQEeUYFG6m*;cRh^h^LhmJLo`N2^xUatlWha~_w1%J`s7LIVcuJ-MaU~gE
zhXH|j-0(3~w(M6XbP_=yP|!vL>TEa*p`qXXQeMCM3@^VeaJk;Vb>B?5sEPYM;Z_4z
zVBp@AxK$?ZNy1G5o8A|xTJ9z7C}`cL_7-TN8?SVU)hMEZHzuay)6}`GDel185pVCs
zYY@5CQLmt16oW^AqgQQCJ6^WmN1@5n>uB2AyUlsm6S&@Ze{LsbXO?u4g-r8{P==+l
zH>Z6^W<Ziof^;&=dDjgfp*44B%62DS{xq4CFRa&Il5<Kqr%labsSW9rjzBsB=?J7F
zU`BxcM>11Rg)bVIbW`y)v**veZrZxqXnAz&WRa6oSa?afnD4sLwa_&Sxx#{i0#Sgo
zK!#3*GFK?puU{`N6*+-AKRFWON5_&$|M6sUGM>+$;CKdajZJvpEyUwH0}~#dwwY}}
zzQ}RTkW1-5V7zR^jw8t={cqU;*#?<_d<t?>cQUyiGVf?IxgT<Q4;Ippl<2=!N{%h7
zg=15$<DB7H**oz@C;hoN_QS`;1Srmuf1zE3>?|C)(61u>h}@D9V=i+J+nC)b7LS`h
zW#&Z}kec|{;+PAZB$5(|^f4R-&=)~+qPp(Hu>@_MgZhPMMDFbw%g#7$NWKG2l5Kec
z>UHSDaZD`w>>p*3J)P1KNJk(Yfpi4Y5lBZM9f5QN(h*2U;D0v)yf2UU;n9+T5-*Ur
z+^p0Bf7Pg@7b9or;=yq$Ggg;*|KB;f%qt5G6TA<Q)+?0Q{^_Y?h{{jlhjB{yxlN}1
z_{V?}t<NZRXgmHEkm=`I;{Otv#%MWOOBR|YcIkb2yc#Xj09sd3qNNd~vy~8!>WW#P
z&<*2u$2AT=+snlDkJn|!;Xf7GKg3n4@aLQ?w`qS^pUN_~3(GiJZqRltVP%>7q22%2
zL~7r+&0~SS-bGrj(y~lT8~mRZvx^jD`Lbp6U6a<9*Vo1BU4@128SaAo!g=+wTsT>4
zzQ!Ak&!OUI(hi~P;DF^$9VnLuoC!G)o~}~{cDHuh`a?X@yYxO)6lI!;GwJ!h(P~kO
zj6;kQxkXCD_Ydu^c8If5<6c>XxEUGp!}lTUcS!u`)IM6)KZ49a?H{aPyQQB|V%PcX
zp-&FvXP?BME$rjq4M{sQwSS(yyeIKvQuh<<dy&aQ97Yu>jlO-wNfZa#51xxa4W!S9
z-lbghJGV$Zo;F4O)S`3{KUYJaE5@d-3$uKK;KQWP$P@frWBi6e@Xgx4y*>9p?_#nc
zAA&wFW31r&fK5-6o+6=19JjsD=OUhCiWsTnx1b*hOI(jz`5O~V`!t@%A+z4p_;+-$
z<!?YR{z2;>)8kzJb_D(V&{I3@evUxzVyA}8!Gr4pqs-ZhpdXomw}<Wi5?5A4VukJy
z_PzVcgI3H}C7_7L>MJYV6{2r{y%npqDrkp&6i9!_s;UW<`)VwIEEI`azWPm~B2*i$
z3B&?^cfr62+F~EHe36K6vlXa|MK+7dh_5za`Ri+IH-lo7EFfe3pf^TquqXdUZ0C>6
z^36cYEURSol9e9Ib9piC@GrjV@+B)vmqCpWDJ+jyTfN1rg|&Re6-$?_u&yX6xze-7
zTC-&73XdhiH9p?cU&OohX^Z|pZ2wPLv6uhrzB!<;1X5V(Dd6|TP)TTGpL(8<QP5uz
z^yy{A1Xtf3pcVZ;V6gnrkX7xg^V7!-rB?vu57t@r(STndmN;4E(WvIiZx;N)XxJ*p
zHw_50W#Qup4itSgG2o{V{Xc`~-!!@$Lb#)wYh%80$XG<lYR=)Ch(IJP+;yQ?z+F{W
z?+!;o;Xov|*-(|&2k~`H(63cXmX_wzXp``2U$k1d{hRAhg_5y|vTO)MqQOvIzk&r@
zBv9icg3g6&V!|z(=|<LF6+)3R(LjZ8#{!#Bq~Qm2NM03ppjr>OYCpVjN%>GCPcb-;
zs(iIUI8$_lTevagYcZ1f7W<DR&C_s8<6WeZ{X03Hr)jgqa-QUcIB0If{K38lBP=DJ
z52=z6bC5Cl_V2>z_i{7wbCGrQ-ZlCB-e>7zpq6(2I_U9NL5SPW^G}vsKhHny?e~FC
zb5G{;{FG(4u9w<Kab`Zx6NAvvRE7CG-(|`3TR>zYA7;7{1)4iDpXZA#d0xxyXB^80
z@M$i}WuA|+T%{}H7<2o%4XwyfOqtK~W0r9Z<ocP<{l8c9muei(w^^1MeB#*oKZcHC
z!v0h58U4QDb%=H&+sb*8Kfqz~U7E*|`_CM0PF3@WjUUzkmW&^a|CEj2s0l2Kn5ZTD
zWBCgkKducd&*X)Yjn8iP*!Vo}Vae+}#@p|oS2Ulmzi?@!mTkJog4@ky+POmUXMUSY
zSy(Pm#njne|DRzo`5i6=W%-*FQ)PJku=+4E*cB@8LDKR1mA~V(<GIZ8C^$}o&+FT^
zeDc;eUR9~D%>1ulps~yAI-Y;?cexxGy8kFKkK_0_bkzS8U$1X>O((PvW%As2Vm`~$
zKsNcJNMj3><a*cx^SC@5WYTf{JYUS!d^_HL{b=_S)vw{~RxtnKltyI;OUaZxc9cb(
z=W2)i9H#F-Y3j@E;pY#n7rE_<sm{5xl$>I7&h;{urp*KK=hKS~X`GE<D~i7ZTqgdH

diff --git a/pipelined/srt/qslc_sqrt_r4a2.c b/pipelined/srt/qslc_sqrt_r4a2.c
deleted file mode 100644
index 252293cc..00000000
--- a/pipelined/srt/qslc_sqrt_r4a2.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
-  Program:      qslc_r4a2.c
-  Description:  Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
-  User:         James E. Stine
-
-*/
-
-#include <stdio.h>
-#include <math.h>
-
-#define DIVISOR_SIZE 3
-#define CARRY_SIZE 7
-#define SUM_SIZE 7
-#define TOT_SIZE 7
-
-void disp_binary(double, int, int);
-
-struct bits {
-  unsigned int divisor : DIVISOR_SIZE;
-  int tot : TOT_SIZE;
-} pla;
-
-/* 
-
-   Function:      disp_binary
-   Description:   This function displays a Double-Precision number into
-   four 16 bit integers using the global union variable 
-   dp_number
-   Argument List: double x            The value to be converted
-   int bits_to_left    Number of bits left of radix point
-   int bits_to_right   Number of bits right of radix point
-   Return value:  none
-
-*/
-void disp_binary(double x, int bits_to_left, int bits_to_right) {
-  int i; 
-  double diff;
-
-  if (fabs(x) <  pow(2.0, ((double) -bits_to_right)) ) {
-    for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-      printf("0");
-    }
-    if (i == bits_to_right+1) 
-      ;
-    
-    return;
-  }
-
-  if (x < 0.0) 
-    x = pow(2.0, ((double) bits_to_left)) + x;
-
-  for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
-    diff = pow(2.0, ((double) -i) );
-    if (x < diff) 
-      printf("0");
-    else {
-      printf("1");
-      x -= diff;
-    }
-    if (i == 0) 
-      ;
-    
-  }
-
-}
-
-int main() {
-  int m;
-  int n;
-  int o;
-  pla.divisor = 0;
-  pla.tot = 0;
-  printf("\tcase({D[5:3],Wmsbs})\n");
-  for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
-    for (m=0; m < pow(2.0, TOT_SIZE); m++) {
-      printf("\t\t11'b");
-      disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
-      printf("_");
-      disp_binary((double) pla.tot, TOT_SIZE, 0);
-      printf(": q = 4'b");
-
-      /*
-	4 bits for Radix 4 (a=2)
-	1000 = +2
-	0100 = +1
-	0000 =  0
-	0010 = -1
-	0001 = -2		
-      */
-      switch (pla.divisor) {
-      case 0:
-	if ((pla.tot) >= 24)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -8)
-	  printf("0000");
-	else if ((pla.tot) >= -26)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 1:
-	if ((pla.tot) >= 28)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -10)
-	  printf("0000");
-	else if ((pla.tot) >= -28)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 2:
-	if ((pla.tot) >= 32)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -12)
-	  printf("0000");
-	else if ((pla.tot) >= -32)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 3:
-	if ((pla.tot) >= 32)
-	  printf("1000");
-	else if ((pla.tot) >= 8)
-	  printf("0100");
-	else if ((pla.tot) >= -12)
-	  printf("0000");
-	else if ((pla.tot) >= -34)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 4:
-	if ((pla.tot) >= 36)
-	  printf("1000");
-	else if ((pla.tot) >= 12)
-	  printf("0100");
-	else if ((pla.tot) >= -12)
-	  printf("0000");
-	else if ((pla.tot) >= -36)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 5:
-	if ((pla.tot) >= 40)
-	  printf("1000");
-	else if ((pla.tot) >= 12)
-	  printf("0100");
-	else if ((pla.tot) >= -16)
-	  printf("0000");
-	else if ((pla.tot) >= -40)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 6:
-	if ((pla.tot) >= 40)
-	  printf("1000");
-	else if ((pla.tot) >= 16)
-	  printf("0100");
-	else if ((pla.tot) >= -16)
-	  printf("0000");
-	else if ((pla.tot) >= -44)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      case 7:
-	if ((pla.tot) >= 44)
-	  printf("1000");
-	else if ((pla.tot) >= 16)
-	  printf("0100");
-	else if ((pla.tot) >= -16)
-	  printf("0000");
-	else if ((pla.tot) >= -46)
-	  printf("0010");
-	else
-	  printf("0001");
-	break;
-      default: printf ("XXX");
-			
-      }
-			
-      printf(";\n");
-      (pla.tot)++;
-    }
-    (pla.divisor)++;
-  }
-  printf("\tendcase\n");
-  
-}
diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv
deleted file mode 100644
index 805dbbae..00000000
--- a/pipelined/srt/qslc_sqrt_r4a2.sv
+++ /dev/null
@@ -1,1026 +0,0 @@
-	case({D[5:3],Wmsbs})
-		11'b000_0000000: q = 4'b0000;
-		11'b000_0000001: q = 4'b0000;
-		11'b000_0000010: q = 4'b0000;
-		11'b000_0000011: q = 4'b0000;
-		11'b000_0000100: q = 4'b0000;
-		11'b000_0000101: q = 4'b0000;
-		11'b000_0000110: q = 4'b0000;
-		11'b000_0000111: q = 4'b0000;
-		11'b000_0001000: q = 4'b0100;
-		11'b000_0001001: q = 4'b0100;
-		11'b000_0001010: q = 4'b0100;
-		11'b000_0001011: q = 4'b0100;
-		11'b000_0001100: q = 4'b0100;
-		11'b000_0001101: q = 4'b0100;
-		11'b000_0001110: q = 4'b0100;
-		11'b000_0001111: q = 4'b0100;
-		11'b000_0010000: q = 4'b0100;
-		11'b000_0010001: q = 4'b0100;
-		11'b000_0010010: q = 4'b0100;
-		11'b000_0010011: q = 4'b0100;
-		11'b000_0010100: q = 4'b0100;
-		11'b000_0010101: q = 4'b0100;
-		11'b000_0010110: q = 4'b0100;
-		11'b000_0010111: q = 4'b0100;
-		11'b000_0011000: q = 4'b1000;
-		11'b000_0011001: q = 4'b1000;
-		11'b000_0011010: q = 4'b1000;
-		11'b000_0011011: q = 4'b1000;
-		11'b000_0011100: q = 4'b1000;
-		11'b000_0011101: q = 4'b1000;
-		11'b000_0011110: q = 4'b1000;
-		11'b000_0011111: q = 4'b1000;
-		11'b000_0100000: q = 4'b1000;
-		11'b000_0100001: q = 4'b1000;
-		11'b000_0100010: q = 4'b1000;
-		11'b000_0100011: q = 4'b1000;
-		11'b000_0100100: q = 4'b1000;
-		11'b000_0100101: q = 4'b1000;
-		11'b000_0100110: q = 4'b1000;
-		11'b000_0100111: q = 4'b1000;
-		11'b000_0101000: q = 4'b1000;
-		11'b000_0101001: q = 4'b1000;
-		11'b000_0101010: q = 4'b1000;
-		11'b000_0101011: q = 4'b1000;
-		11'b000_0101100: q = 4'b1000;
-		11'b000_0101101: q = 4'b1000;
-		11'b000_0101110: q = 4'b1000;
-		11'b000_0101111: q = 4'b1000;
-		11'b000_0110000: q = 4'b1000;
-		11'b000_0110001: q = 4'b1000;
-		11'b000_0110010: q = 4'b1000;
-		11'b000_0110011: q = 4'b1000;
-		11'b000_0110100: q = 4'b1000;
-		11'b000_0110101: q = 4'b1000;
-		11'b000_0110110: q = 4'b1000;
-		11'b000_0110111: q = 4'b1000;
-		11'b000_0111000: q = 4'b1000;
-		11'b000_0111001: q = 4'b1000;
-		11'b000_0111010: q = 4'b1000;
-		11'b000_0111011: q = 4'b1000;
-		11'b000_0111100: q = 4'b1000;
-		11'b000_0111101: q = 4'b1000;
-		11'b000_0111110: q = 4'b1000;
-		11'b000_0111111: q = 4'b1000;
-		11'b000_1000000: q = 4'b0001;
-		11'b000_1000001: q = 4'b0001;
-		11'b000_1000010: q = 4'b0001;
-		11'b000_1000011: q = 4'b0001;
-		11'b000_1000100: q = 4'b0001;
-		11'b000_1000101: q = 4'b0001;
-		11'b000_1000110: q = 4'b0001;
-		11'b000_1000111: q = 4'b0001;
-		11'b000_1001000: q = 4'b0001;
-		11'b000_1001001: q = 4'b0001;
-		11'b000_1001010: q = 4'b0001;
-		11'b000_1001011: q = 4'b0001;
-		11'b000_1001100: q = 4'b0001;
-		11'b000_1001101: q = 4'b0001;
-		11'b000_1001110: q = 4'b0001;
-		11'b000_1001111: q = 4'b0001;
-		11'b000_1010000: q = 4'b0001;
-		11'b000_1010001: q = 4'b0001;
-		11'b000_1010010: q = 4'b0001;
-		11'b000_1010011: q = 4'b0001;
-		11'b000_1010100: q = 4'b0001;
-		11'b000_1010101: q = 4'b0001;
-		11'b000_1010110: q = 4'b0001;
-		11'b000_1010111: q = 4'b0001;
-		11'b000_1011000: q = 4'b0001;
-		11'b000_1011001: q = 4'b0001;
-		11'b000_1011010: q = 4'b0001;
-		11'b000_1011011: q = 4'b0001;
-		11'b000_1011100: q = 4'b0001;
-		11'b000_1011101: q = 4'b0001;
-		11'b000_1011110: q = 4'b0001;
-		11'b000_1011111: q = 4'b0001;
-		11'b000_1100000: q = 4'b0001;
-		11'b000_1100001: q = 4'b0001;
-		11'b000_1100010: q = 4'b0001;
-		11'b000_1100011: q = 4'b0001;
-		11'b000_1100100: q = 4'b0001;
-		11'b000_1100101: q = 4'b0001;
-		11'b000_1100110: q = 4'b0010;
-		11'b000_1100111: q = 4'b0010;
-		11'b000_1101000: q = 4'b0010;
-		11'b000_1101001: q = 4'b0010;
-		11'b000_1101010: q = 4'b0010;
-		11'b000_1101011: q = 4'b0010;
-		11'b000_1101100: q = 4'b0010;
-		11'b000_1101101: q = 4'b0010;
-		11'b000_1101110: q = 4'b0010;
-		11'b000_1101111: q = 4'b0010;
-		11'b000_1110000: q = 4'b0010;
-		11'b000_1110001: q = 4'b0010;
-		11'b000_1110010: q = 4'b0010;
-		11'b000_1110011: q = 4'b0010;
-		11'b000_1110100: q = 4'b0010;
-		11'b000_1110101: q = 4'b0010;
-		11'b000_1110110: q = 4'b0010;
-		11'b000_1110111: q = 4'b0010;
-		11'b000_1111000: q = 4'b0000;
-		11'b000_1111001: q = 4'b0000;
-		11'b000_1111010: q = 4'b0000;
-		11'b000_1111011: q = 4'b0000;
-		11'b000_1111100: q = 4'b0000;
-		11'b000_1111101: q = 4'b0000;
-		11'b000_1111110: q = 4'b0000;
-		11'b000_1111111: q = 4'b0000;
-		11'b001_0000000: q = 4'b0000;
-		11'b001_0000001: q = 4'b0000;
-		11'b001_0000010: q = 4'b0000;
-		11'b001_0000011: q = 4'b0000;
-		11'b001_0000100: q = 4'b0000;
-		11'b001_0000101: q = 4'b0000;
-		11'b001_0000110: q = 4'b0000;
-		11'b001_0000111: q = 4'b0000;
-		11'b001_0001000: q = 4'b0100;
-		11'b001_0001001: q = 4'b0100;
-		11'b001_0001010: q = 4'b0100;
-		11'b001_0001011: q = 4'b0100;
-		11'b001_0001100: q = 4'b0100;
-		11'b001_0001101: q = 4'b0100;
-		11'b001_0001110: q = 4'b0100;
-		11'b001_0001111: q = 4'b0100;
-		11'b001_0010000: q = 4'b0100;
-		11'b001_0010001: q = 4'b0100;
-		11'b001_0010010: q = 4'b0100;
-		11'b001_0010011: q = 4'b0100;
-		11'b001_0010100: q = 4'b0100;
-		11'b001_0010101: q = 4'b0100;
-		11'b001_0010110: q = 4'b0100;
-		11'b001_0010111: q = 4'b0100;
-		11'b001_0011000: q = 4'b0100;
-		11'b001_0011001: q = 4'b0100;
-		11'b001_0011010: q = 4'b0100;
-		11'b001_0011011: q = 4'b0100;
-		11'b001_0011100: q = 4'b1000;
-		11'b001_0011101: q = 4'b1000;
-		11'b001_0011110: q = 4'b1000;
-		11'b001_0011111: q = 4'b1000;
-		11'b001_0100000: q = 4'b1000;
-		11'b001_0100001: q = 4'b1000;
-		11'b001_0100010: q = 4'b1000;
-		11'b001_0100011: q = 4'b1000;
-		11'b001_0100100: q = 4'b1000;
-		11'b001_0100101: q = 4'b1000;
-		11'b001_0100110: q = 4'b1000;
-		11'b001_0100111: q = 4'b1000;
-		11'b001_0101000: q = 4'b1000;
-		11'b001_0101001: q = 4'b1000;
-		11'b001_0101010: q = 4'b1000;
-		11'b001_0101011: q = 4'b1000;
-		11'b001_0101100: q = 4'b1000;
-		11'b001_0101101: q = 4'b1000;
-		11'b001_0101110: q = 4'b1000;
-		11'b001_0101111: q = 4'b1000;
-		11'b001_0110000: q = 4'b1000;
-		11'b001_0110001: q = 4'b1000;
-		11'b001_0110010: q = 4'b1000;
-		11'b001_0110011: q = 4'b1000;
-		11'b001_0110100: q = 4'b1000;
-		11'b001_0110101: q = 4'b1000;
-		11'b001_0110110: q = 4'b1000;
-		11'b001_0110111: q = 4'b1000;
-		11'b001_0111000: q = 4'b1000;
-		11'b001_0111001: q = 4'b1000;
-		11'b001_0111010: q = 4'b1000;
-		11'b001_0111011: q = 4'b1000;
-		11'b001_0111100: q = 4'b1000;
-		11'b001_0111101: q = 4'b1000;
-		11'b001_0111110: q = 4'b1000;
-		11'b001_0111111: q = 4'b1000;
-		11'b001_1000000: q = 4'b0001;
-		11'b001_1000001: q = 4'b0001;
-		11'b001_1000010: q = 4'b0001;
-		11'b001_1000011: q = 4'b0001;
-		11'b001_1000100: q = 4'b0001;
-		11'b001_1000101: q = 4'b0001;
-		11'b001_1000110: q = 4'b0001;
-		11'b001_1000111: q = 4'b0001;
-		11'b001_1001000: q = 4'b0001;
-		11'b001_1001001: q = 4'b0001;
-		11'b001_1001010: q = 4'b0001;
-		11'b001_1001011: q = 4'b0001;
-		11'b001_1001100: q = 4'b0001;
-		11'b001_1001101: q = 4'b0001;
-		11'b001_1001110: q = 4'b0001;
-		11'b001_1001111: q = 4'b0001;
-		11'b001_1010000: q = 4'b0001;
-		11'b001_1010001: q = 4'b0001;
-		11'b001_1010010: q = 4'b0001;
-		11'b001_1010011: q = 4'b0001;
-		11'b001_1010100: q = 4'b0001;
-		11'b001_1010101: q = 4'b0001;
-		11'b001_1010110: q = 4'b0001;
-		11'b001_1010111: q = 4'b0001;
-		11'b001_1011000: q = 4'b0001;
-		11'b001_1011001: q = 4'b0001;
-		11'b001_1011010: q = 4'b0001;
-		11'b001_1011011: q = 4'b0001;
-		11'b001_1011100: q = 4'b0001;
-		11'b001_1011101: q = 4'b0001;
-		11'b001_1011110: q = 4'b0001;
-		11'b001_1011111: q = 4'b0001;
-		11'b001_1100000: q = 4'b0001;
-		11'b001_1100001: q = 4'b0001;
-		11'b001_1100010: q = 4'b0001;
-		11'b001_1100011: q = 4'b0001;
-		11'b001_1100100: q = 4'b0010;
-		11'b001_1100101: q = 4'b0010;
-		11'b001_1100110: q = 4'b0010;
-		11'b001_1100111: q = 4'b0010;
-		11'b001_1101000: q = 4'b0010;
-		11'b001_1101001: q = 4'b0010;
-		11'b001_1101010: q = 4'b0010;
-		11'b001_1101011: q = 4'b0010;
-		11'b001_1101100: q = 4'b0010;
-		11'b001_1101101: q = 4'b0010;
-		11'b001_1101110: q = 4'b0010;
-		11'b001_1101111: q = 4'b0010;
-		11'b001_1110000: q = 4'b0010;
-		11'b001_1110001: q = 4'b0010;
-		11'b001_1110010: q = 4'b0010;
-		11'b001_1110011: q = 4'b0010;
-		11'b001_1110100: q = 4'b0010;
-		11'b001_1110101: q = 4'b0010;
-		11'b001_1110110: q = 4'b0000;
-		11'b001_1110111: q = 4'b0000;
-		11'b001_1111000: q = 4'b0000;
-		11'b001_1111001: q = 4'b0000;
-		11'b001_1111010: q = 4'b0000;
-		11'b001_1111011: q = 4'b0000;
-		11'b001_1111100: q = 4'b0000;
-		11'b001_1111101: q = 4'b0000;
-		11'b001_1111110: q = 4'b0000;
-		11'b001_1111111: q = 4'b0000;
-		11'b010_0000000: q = 4'b0000;
-		11'b010_0000001: q = 4'b0000;
-		11'b010_0000010: q = 4'b0000;
-		11'b010_0000011: q = 4'b0000;
-		11'b010_0000100: q = 4'b0000;
-		11'b010_0000101: q = 4'b0000;
-		11'b010_0000110: q = 4'b0000;
-		11'b010_0000111: q = 4'b0000;
-		11'b010_0001000: q = 4'b0100;
-		11'b010_0001001: q = 4'b0100;
-		11'b010_0001010: q = 4'b0100;
-		11'b010_0001011: q = 4'b0100;
-		11'b010_0001100: q = 4'b0100;
-		11'b010_0001101: q = 4'b0100;
-		11'b010_0001110: q = 4'b0100;
-		11'b010_0001111: q = 4'b0100;
-		11'b010_0010000: q = 4'b0100;
-		11'b010_0010001: q = 4'b0100;
-		11'b010_0010010: q = 4'b0100;
-		11'b010_0010011: q = 4'b0100;
-		11'b010_0010100: q = 4'b0100;
-		11'b010_0010101: q = 4'b0100;
-		11'b010_0010110: q = 4'b0100;
-		11'b010_0010111: q = 4'b0100;
-		11'b010_0011000: q = 4'b0100;
-		11'b010_0011001: q = 4'b0100;
-		11'b010_0011010: q = 4'b0100;
-		11'b010_0011011: q = 4'b0100;
-		11'b010_0011100: q = 4'b0100;
-		11'b010_0011101: q = 4'b0100;
-		11'b010_0011110: q = 4'b0100;
-		11'b010_0011111: q = 4'b0100;
-		11'b010_0100000: q = 4'b1000;
-		11'b010_0100001: q = 4'b1000;
-		11'b010_0100010: q = 4'b1000;
-		11'b010_0100011: q = 4'b1000;
-		11'b010_0100100: q = 4'b1000;
-		11'b010_0100101: q = 4'b1000;
-		11'b010_0100110: q = 4'b1000;
-		11'b010_0100111: q = 4'b1000;
-		11'b010_0101000: q = 4'b1000;
-		11'b010_0101001: q = 4'b1000;
-		11'b010_0101010: q = 4'b1000;
-		11'b010_0101011: q = 4'b1000;
-		11'b010_0101100: q = 4'b1000;
-		11'b010_0101101: q = 4'b1000;
-		11'b010_0101110: q = 4'b1000;
-		11'b010_0101111: q = 4'b1000;
-		11'b010_0110000: q = 4'b1000;
-		11'b010_0110001: q = 4'b1000;
-		11'b010_0110010: q = 4'b1000;
-		11'b010_0110011: q = 4'b1000;
-		11'b010_0110100: q = 4'b1000;
-		11'b010_0110101: q = 4'b1000;
-		11'b010_0110110: q = 4'b1000;
-		11'b010_0110111: q = 4'b1000;
-		11'b010_0111000: q = 4'b1000;
-		11'b010_0111001: q = 4'b1000;
-		11'b010_0111010: q = 4'b1000;
-		11'b010_0111011: q = 4'b1000;
-		11'b010_0111100: q = 4'b1000;
-		11'b010_0111101: q = 4'b1000;
-		11'b010_0111110: q = 4'b1000;
-		11'b010_0111111: q = 4'b1000;
-		11'b010_1000000: q = 4'b0001;
-		11'b010_1000001: q = 4'b0001;
-		11'b010_1000010: q = 4'b0001;
-		11'b010_1000011: q = 4'b0001;
-		11'b010_1000100: q = 4'b0001;
-		11'b010_1000101: q = 4'b0001;
-		11'b010_1000110: q = 4'b0001;
-		11'b010_1000111: q = 4'b0001;
-		11'b010_1001000: q = 4'b0001;
-		11'b010_1001001: q = 4'b0001;
-		11'b010_1001010: q = 4'b0001;
-		11'b010_1001011: q = 4'b0001;
-		11'b010_1001100: q = 4'b0001;
-		11'b010_1001101: q = 4'b0001;
-		11'b010_1001110: q = 4'b0001;
-		11'b010_1001111: q = 4'b0001;
-		11'b010_1010000: q = 4'b0001;
-		11'b010_1010001: q = 4'b0001;
-		11'b010_1010010: q = 4'b0001;
-		11'b010_1010011: q = 4'b0001;
-		11'b010_1010100: q = 4'b0001;
-		11'b010_1010101: q = 4'b0001;
-		11'b010_1010110: q = 4'b0001;
-		11'b010_1010111: q = 4'b0001;
-		11'b010_1011000: q = 4'b0001;
-		11'b010_1011001: q = 4'b0001;
-		11'b010_1011010: q = 4'b0001;
-		11'b010_1011011: q = 4'b0001;
-		11'b010_1011100: q = 4'b0001;
-		11'b010_1011101: q = 4'b0001;
-		11'b010_1011110: q = 4'b0001;
-		11'b010_1011111: q = 4'b0001;
-		11'b010_1100000: q = 4'b0010;
-		11'b010_1100001: q = 4'b0010;
-		11'b010_1100010: q = 4'b0010;
-		11'b010_1100011: q = 4'b0010;
-		11'b010_1100100: q = 4'b0010;
-		11'b010_1100101: q = 4'b0010;
-		11'b010_1100110: q = 4'b0010;
-		11'b010_1100111: q = 4'b0010;
-		11'b010_1101000: q = 4'b0010;
-		11'b010_1101001: q = 4'b0010;
-		11'b010_1101010: q = 4'b0010;
-		11'b010_1101011: q = 4'b0010;
-		11'b010_1101100: q = 4'b0010;
-		11'b010_1101101: q = 4'b0010;
-		11'b010_1101110: q = 4'b0010;
-		11'b010_1101111: q = 4'b0010;
-		11'b010_1110000: q = 4'b0010;
-		11'b010_1110001: q = 4'b0010;
-		11'b010_1110010: q = 4'b0010;
-		11'b010_1110011: q = 4'b0010;
-		11'b010_1110100: q = 4'b0000;
-		11'b010_1110101: q = 4'b0000;
-		11'b010_1110110: q = 4'b0000;
-		11'b010_1110111: q = 4'b0000;
-		11'b010_1111000: q = 4'b0000;
-		11'b010_1111001: q = 4'b0000;
-		11'b010_1111010: q = 4'b0000;
-		11'b010_1111011: q = 4'b0000;
-		11'b010_1111100: q = 4'b0000;
-		11'b010_1111101: q = 4'b0000;
-		11'b010_1111110: q = 4'b0000;
-		11'b010_1111111: q = 4'b0000;
-		11'b011_0000000: q = 4'b0000;
-		11'b011_0000001: q = 4'b0000;
-		11'b011_0000010: q = 4'b0000;
-		11'b011_0000011: q = 4'b0000;
-		11'b011_0000100: q = 4'b0000;
-		11'b011_0000101: q = 4'b0000;
-		11'b011_0000110: q = 4'b0000;
-		11'b011_0000111: q = 4'b0000;
-		11'b011_0001000: q = 4'b0100;
-		11'b011_0001001: q = 4'b0100;
-		11'b011_0001010: q = 4'b0100;
-		11'b011_0001011: q = 4'b0100;
-		11'b011_0001100: q = 4'b0100;
-		11'b011_0001101: q = 4'b0100;
-		11'b011_0001110: q = 4'b0100;
-		11'b011_0001111: q = 4'b0100;
-		11'b011_0010000: q = 4'b0100;
-		11'b011_0010001: q = 4'b0100;
-		11'b011_0010010: q = 4'b0100;
-		11'b011_0010011: q = 4'b0100;
-		11'b011_0010100: q = 4'b0100;
-		11'b011_0010101: q = 4'b0100;
-		11'b011_0010110: q = 4'b0100;
-		11'b011_0010111: q = 4'b0100;
-		11'b011_0011000: q = 4'b0100;
-		11'b011_0011001: q = 4'b0100;
-		11'b011_0011010: q = 4'b0100;
-		11'b011_0011011: q = 4'b0100;
-		11'b011_0011100: q = 4'b0100;
-		11'b011_0011101: q = 4'b0100;
-		11'b011_0011110: q = 4'b0100;
-		11'b011_0011111: q = 4'b0100;
-		11'b011_0100000: q = 4'b1000;
-		11'b011_0100001: q = 4'b1000;
-		11'b011_0100010: q = 4'b1000;
-		11'b011_0100011: q = 4'b1000;
-		11'b011_0100100: q = 4'b1000;
-		11'b011_0100101: q = 4'b1000;
-		11'b011_0100110: q = 4'b1000;
-		11'b011_0100111: q = 4'b1000;
-		11'b011_0101000: q = 4'b1000;
-		11'b011_0101001: q = 4'b1000;
-		11'b011_0101010: q = 4'b1000;
-		11'b011_0101011: q = 4'b1000;
-		11'b011_0101100: q = 4'b1000;
-		11'b011_0101101: q = 4'b1000;
-		11'b011_0101110: q = 4'b1000;
-		11'b011_0101111: q = 4'b1000;
-		11'b011_0110000: q = 4'b1000;
-		11'b011_0110001: q = 4'b1000;
-		11'b011_0110010: q = 4'b1000;
-		11'b011_0110011: q = 4'b1000;
-		11'b011_0110100: q = 4'b1000;
-		11'b011_0110101: q = 4'b1000;
-		11'b011_0110110: q = 4'b1000;
-		11'b011_0110111: q = 4'b1000;
-		11'b011_0111000: q = 4'b1000;
-		11'b011_0111001: q = 4'b1000;
-		11'b011_0111010: q = 4'b1000;
-		11'b011_0111011: q = 4'b1000;
-		11'b011_0111100: q = 4'b1000;
-		11'b011_0111101: q = 4'b1000;
-		11'b011_0111110: q = 4'b1000;
-		11'b011_0111111: q = 4'b1000;
-		11'b011_1000000: q = 4'b0001;
-		11'b011_1000001: q = 4'b0001;
-		11'b011_1000010: q = 4'b0001;
-		11'b011_1000011: q = 4'b0001;
-		11'b011_1000100: q = 4'b0001;
-		11'b011_1000101: q = 4'b0001;
-		11'b011_1000110: q = 4'b0001;
-		11'b011_1000111: q = 4'b0001;
-		11'b011_1001000: q = 4'b0001;
-		11'b011_1001001: q = 4'b0001;
-		11'b011_1001010: q = 4'b0001;
-		11'b011_1001011: q = 4'b0001;
-		11'b011_1001100: q = 4'b0001;
-		11'b011_1001101: q = 4'b0001;
-		11'b011_1001110: q = 4'b0001;
-		11'b011_1001111: q = 4'b0001;
-		11'b011_1010000: q = 4'b0001;
-		11'b011_1010001: q = 4'b0001;
-		11'b011_1010010: q = 4'b0001;
-		11'b011_1010011: q = 4'b0001;
-		11'b011_1010100: q = 4'b0001;
-		11'b011_1010101: q = 4'b0001;
-		11'b011_1010110: q = 4'b0001;
-		11'b011_1010111: q = 4'b0001;
-		11'b011_1011000: q = 4'b0001;
-		11'b011_1011001: q = 4'b0001;
-		11'b011_1011010: q = 4'b0001;
-		11'b011_1011011: q = 4'b0001;
-		11'b011_1011100: q = 4'b0001;
-		11'b011_1011101: q = 4'b0001;
-		11'b011_1011110: q = 4'b0010;
-		11'b011_1011111: q = 4'b0010;
-		11'b011_1100000: q = 4'b0010;
-		11'b011_1100001: q = 4'b0010;
-		11'b011_1100010: q = 4'b0010;
-		11'b011_1100011: q = 4'b0010;
-		11'b011_1100100: q = 4'b0010;
-		11'b011_1100101: q = 4'b0010;
-		11'b011_1100110: q = 4'b0010;
-		11'b011_1100111: q = 4'b0010;
-		11'b011_1101000: q = 4'b0010;
-		11'b011_1101001: q = 4'b0010;
-		11'b011_1101010: q = 4'b0010;
-		11'b011_1101011: q = 4'b0010;
-		11'b011_1101100: q = 4'b0010;
-		11'b011_1101101: q = 4'b0010;
-		11'b011_1101110: q = 4'b0010;
-		11'b011_1101111: q = 4'b0010;
-		11'b011_1110000: q = 4'b0010;
-		11'b011_1110001: q = 4'b0010;
-		11'b011_1110010: q = 4'b0010;
-		11'b011_1110011: q = 4'b0010;
-		11'b011_1110100: q = 4'b0000;
-		11'b011_1110101: q = 4'b0000;
-		11'b011_1110110: q = 4'b0000;
-		11'b011_1110111: q = 4'b0000;
-		11'b011_1111000: q = 4'b0000;
-		11'b011_1111001: q = 4'b0000;
-		11'b011_1111010: q = 4'b0000;
-		11'b011_1111011: q = 4'b0000;
-		11'b011_1111100: q = 4'b0000;
-		11'b011_1111101: q = 4'b0000;
-		11'b011_1111110: q = 4'b0000;
-		11'b011_1111111: q = 4'b0000;
-		11'b100_0000000: q = 4'b0000;
-		11'b100_0000001: q = 4'b0000;
-		11'b100_0000010: q = 4'b0000;
-		11'b100_0000011: q = 4'b0000;
-		11'b100_0000100: q = 4'b0000;
-		11'b100_0000101: q = 4'b0000;
-		11'b100_0000110: q = 4'b0000;
-		11'b100_0000111: q = 4'b0000;
-		11'b100_0001000: q = 4'b0000;
-		11'b100_0001001: q = 4'b0000;
-		11'b100_0001010: q = 4'b0000;
-		11'b100_0001011: q = 4'b0000;
-		11'b100_0001100: q = 4'b0100;
-		11'b100_0001101: q = 4'b0100;
-		11'b100_0001110: q = 4'b0100;
-		11'b100_0001111: q = 4'b0100;
-		11'b100_0010000: q = 4'b0100;
-		11'b100_0010001: q = 4'b0100;
-		11'b100_0010010: q = 4'b0100;
-		11'b100_0010011: q = 4'b0100;
-		11'b100_0010100: q = 4'b0100;
-		11'b100_0010101: q = 4'b0100;
-		11'b100_0010110: q = 4'b0100;
-		11'b100_0010111: q = 4'b0100;
-		11'b100_0011000: q = 4'b0100;
-		11'b100_0011001: q = 4'b0100;
-		11'b100_0011010: q = 4'b0100;
-		11'b100_0011011: q = 4'b0100;
-		11'b100_0011100: q = 4'b0100;
-		11'b100_0011101: q = 4'b0100;
-		11'b100_0011110: q = 4'b0100;
-		11'b100_0011111: q = 4'b0100;
-		11'b100_0100000: q = 4'b0100;
-		11'b100_0100001: q = 4'b0100;
-		11'b100_0100010: q = 4'b0100;
-		11'b100_0100011: q = 4'b0100;
-		11'b100_0100100: q = 4'b1000;
-		11'b100_0100101: q = 4'b1000;
-		11'b100_0100110: q = 4'b1000;
-		11'b100_0100111: q = 4'b1000;
-		11'b100_0101000: q = 4'b1000;
-		11'b100_0101001: q = 4'b1000;
-		11'b100_0101010: q = 4'b1000;
-		11'b100_0101011: q = 4'b1000;
-		11'b100_0101100: q = 4'b1000;
-		11'b100_0101101: q = 4'b1000;
-		11'b100_0101110: q = 4'b1000;
-		11'b100_0101111: q = 4'b1000;
-		11'b100_0110000: q = 4'b1000;
-		11'b100_0110001: q = 4'b1000;
-		11'b100_0110010: q = 4'b1000;
-		11'b100_0110011: q = 4'b1000;
-		11'b100_0110100: q = 4'b1000;
-		11'b100_0110101: q = 4'b1000;
-		11'b100_0110110: q = 4'b1000;
-		11'b100_0110111: q = 4'b1000;
-		11'b100_0111000: q = 4'b1000;
-		11'b100_0111001: q = 4'b1000;
-		11'b100_0111010: q = 4'b1000;
-		11'b100_0111011: q = 4'b1000;
-		11'b100_0111100: q = 4'b1000;
-		11'b100_0111101: q = 4'b1000;
-		11'b100_0111110: q = 4'b1000;
-		11'b100_0111111: q = 4'b1000;
-		11'b100_1000000: q = 4'b0001;
-		11'b100_1000001: q = 4'b0001;
-		11'b100_1000010: q = 4'b0001;
-		11'b100_1000011: q = 4'b0001;
-		11'b100_1000100: q = 4'b0001;
-		11'b100_1000101: q = 4'b0001;
-		11'b100_1000110: q = 4'b0001;
-		11'b100_1000111: q = 4'b0001;
-		11'b100_1001000: q = 4'b0001;
-		11'b100_1001001: q = 4'b0001;
-		11'b100_1001010: q = 4'b0001;
-		11'b100_1001011: q = 4'b0001;
-		11'b100_1001100: q = 4'b0001;
-		11'b100_1001101: q = 4'b0001;
-		11'b100_1001110: q = 4'b0001;
-		11'b100_1001111: q = 4'b0001;
-		11'b100_1010000: q = 4'b0001;
-		11'b100_1010001: q = 4'b0001;
-		11'b100_1010010: q = 4'b0001;
-		11'b100_1010011: q = 4'b0001;
-		11'b100_1010100: q = 4'b0001;
-		11'b100_1010101: q = 4'b0001;
-		11'b100_1010110: q = 4'b0001;
-		11'b100_1010111: q = 4'b0001;
-		11'b100_1011000: q = 4'b0001;
-		11'b100_1011001: q = 4'b0001;
-		11'b100_1011010: q = 4'b0001;
-		11'b100_1011011: q = 4'b0001;
-		11'b100_1011100: q = 4'b0010;
-		11'b100_1011101: q = 4'b0010;
-		11'b100_1011110: q = 4'b0010;
-		11'b100_1011111: q = 4'b0010;
-		11'b100_1100000: q = 4'b0010;
-		11'b100_1100001: q = 4'b0010;
-		11'b100_1100010: q = 4'b0010;
-		11'b100_1100011: q = 4'b0010;
-		11'b100_1100100: q = 4'b0010;
-		11'b100_1100101: q = 4'b0010;
-		11'b100_1100110: q = 4'b0010;
-		11'b100_1100111: q = 4'b0010;
-		11'b100_1101000: q = 4'b0010;
-		11'b100_1101001: q = 4'b0010;
-		11'b100_1101010: q = 4'b0010;
-		11'b100_1101011: q = 4'b0010;
-		11'b100_1101100: q = 4'b0010;
-		11'b100_1101101: q = 4'b0010;
-		11'b100_1101110: q = 4'b0010;
-		11'b100_1101111: q = 4'b0010;
-		11'b100_1110000: q = 4'b0010;
-		11'b100_1110001: q = 4'b0010;
-		11'b100_1110010: q = 4'b0010;
-		11'b100_1110011: q = 4'b0010;
-		11'b100_1110100: q = 4'b0000;
-		11'b100_1110101: q = 4'b0000;
-		11'b100_1110110: q = 4'b0000;
-		11'b100_1110111: q = 4'b0000;
-		11'b100_1111000: q = 4'b0000;
-		11'b100_1111001: q = 4'b0000;
-		11'b100_1111010: q = 4'b0000;
-		11'b100_1111011: q = 4'b0000;
-		11'b100_1111100: q = 4'b0000;
-		11'b100_1111101: q = 4'b0000;
-		11'b100_1111110: q = 4'b0000;
-		11'b100_1111111: q = 4'b0000;
-		11'b101_0000000: q = 4'b0000;
-		11'b101_0000001: q = 4'b0000;
-		11'b101_0000010: q = 4'b0000;
-		11'b101_0000011: q = 4'b0000;
-		11'b101_0000100: q = 4'b0000;
-		11'b101_0000101: q = 4'b0000;
-		11'b101_0000110: q = 4'b0000;
-		11'b101_0000111: q = 4'b0000;
-		11'b101_0001000: q = 4'b0000;
-		11'b101_0001001: q = 4'b0000;
-		11'b101_0001010: q = 4'b0000;
-		11'b101_0001011: q = 4'b0000;
-		11'b101_0001100: q = 4'b0100;
-		11'b101_0001101: q = 4'b0100;
-		11'b101_0001110: q = 4'b0100;
-		11'b101_0001111: q = 4'b0100;
-		11'b101_0010000: q = 4'b0100;
-		11'b101_0010001: q = 4'b0100;
-		11'b101_0010010: q = 4'b0100;
-		11'b101_0010011: q = 4'b0100;
-		11'b101_0010100: q = 4'b0100;
-		11'b101_0010101: q = 4'b0100;
-		11'b101_0010110: q = 4'b0100;
-		11'b101_0010111: q = 4'b0100;
-		11'b101_0011000: q = 4'b0100;
-		11'b101_0011001: q = 4'b0100;
-		11'b101_0011010: q = 4'b0100;
-		11'b101_0011011: q = 4'b0100;
-		11'b101_0011100: q = 4'b0100;
-		11'b101_0011101: q = 4'b0100;
-		11'b101_0011110: q = 4'b0100;
-		11'b101_0011111: q = 4'b0100;
-		11'b101_0100000: q = 4'b0100;
-		11'b101_0100001: q = 4'b0100;
-		11'b101_0100010: q = 4'b0100;
-		11'b101_0100011: q = 4'b0100;
-		11'b101_0100100: q = 4'b0100;
-		11'b101_0100101: q = 4'b0100;
-		11'b101_0100110: q = 4'b0100;
-		11'b101_0100111: q = 4'b0100;
-		11'b101_0101000: q = 4'b1000;
-		11'b101_0101001: q = 4'b1000;
-		11'b101_0101010: q = 4'b1000;
-		11'b101_0101011: q = 4'b1000;
-		11'b101_0101100: q = 4'b1000;
-		11'b101_0101101: q = 4'b1000;
-		11'b101_0101110: q = 4'b1000;
-		11'b101_0101111: q = 4'b1000;
-		11'b101_0110000: q = 4'b1000;
-		11'b101_0110001: q = 4'b1000;
-		11'b101_0110010: q = 4'b1000;
-		11'b101_0110011: q = 4'b1000;
-		11'b101_0110100: q = 4'b1000;
-		11'b101_0110101: q = 4'b1000;
-		11'b101_0110110: q = 4'b1000;
-		11'b101_0110111: q = 4'b1000;
-		11'b101_0111000: q = 4'b1000;
-		11'b101_0111001: q = 4'b1000;
-		11'b101_0111010: q = 4'b1000;
-		11'b101_0111011: q = 4'b1000;
-		11'b101_0111100: q = 4'b1000;
-		11'b101_0111101: q = 4'b1000;
-		11'b101_0111110: q = 4'b1000;
-		11'b101_0111111: q = 4'b1000;
-		11'b101_1000000: q = 4'b0001;
-		11'b101_1000001: q = 4'b0001;
-		11'b101_1000010: q = 4'b0001;
-		11'b101_1000011: q = 4'b0001;
-		11'b101_1000100: q = 4'b0001;
-		11'b101_1000101: q = 4'b0001;
-		11'b101_1000110: q = 4'b0001;
-		11'b101_1000111: q = 4'b0001;
-		11'b101_1001000: q = 4'b0001;
-		11'b101_1001001: q = 4'b0001;
-		11'b101_1001010: q = 4'b0001;
-		11'b101_1001011: q = 4'b0001;
-		11'b101_1001100: q = 4'b0001;
-		11'b101_1001101: q = 4'b0001;
-		11'b101_1001110: q = 4'b0001;
-		11'b101_1001111: q = 4'b0001;
-		11'b101_1010000: q = 4'b0001;
-		11'b101_1010001: q = 4'b0001;
-		11'b101_1010010: q = 4'b0001;
-		11'b101_1010011: q = 4'b0001;
-		11'b101_1010100: q = 4'b0001;
-		11'b101_1010101: q = 4'b0001;
-		11'b101_1010110: q = 4'b0001;
-		11'b101_1010111: q = 4'b0001;
-		11'b101_1011000: q = 4'b0010;
-		11'b101_1011001: q = 4'b0010;
-		11'b101_1011010: q = 4'b0010;
-		11'b101_1011011: q = 4'b0010;
-		11'b101_1011100: q = 4'b0010;
-		11'b101_1011101: q = 4'b0010;
-		11'b101_1011110: q = 4'b0010;
-		11'b101_1011111: q = 4'b0010;
-		11'b101_1100000: q = 4'b0010;
-		11'b101_1100001: q = 4'b0010;
-		11'b101_1100010: q = 4'b0010;
-		11'b101_1100011: q = 4'b0010;
-		11'b101_1100100: q = 4'b0010;
-		11'b101_1100101: q = 4'b0010;
-		11'b101_1100110: q = 4'b0010;
-		11'b101_1100111: q = 4'b0010;
-		11'b101_1101000: q = 4'b0010;
-		11'b101_1101001: q = 4'b0010;
-		11'b101_1101010: q = 4'b0010;
-		11'b101_1101011: q = 4'b0010;
-		11'b101_1101100: q = 4'b0010;
-		11'b101_1101101: q = 4'b0010;
-		11'b101_1101110: q = 4'b0010;
-		11'b101_1101111: q = 4'b0010;
-		11'b101_1110000: q = 4'b0000;
-		11'b101_1110001: q = 4'b0000;
-		11'b101_1110010: q = 4'b0000;
-		11'b101_1110011: q = 4'b0000;
-		11'b101_1110100: q = 4'b0000;
-		11'b101_1110101: q = 4'b0000;
-		11'b101_1110110: q = 4'b0000;
-		11'b101_1110111: q = 4'b0000;
-		11'b101_1111000: q = 4'b0000;
-		11'b101_1111001: q = 4'b0000;
-		11'b101_1111010: q = 4'b0000;
-		11'b101_1111011: q = 4'b0000;
-		11'b101_1111100: q = 4'b0000;
-		11'b101_1111101: q = 4'b0000;
-		11'b101_1111110: q = 4'b0000;
-		11'b101_1111111: q = 4'b0000;
-		11'b110_0000000: q = 4'b0000;
-		11'b110_0000001: q = 4'b0000;
-		11'b110_0000010: q = 4'b0000;
-		11'b110_0000011: q = 4'b0000;
-		11'b110_0000100: q = 4'b0000;
-		11'b110_0000101: q = 4'b0000;
-		11'b110_0000110: q = 4'b0000;
-		11'b110_0000111: q = 4'b0000;
-		11'b110_0001000: q = 4'b0000;
-		11'b110_0001001: q = 4'b0000;
-		11'b110_0001010: q = 4'b0000;
-		11'b110_0001011: q = 4'b0000;
-		11'b110_0001100: q = 4'b0000;
-		11'b110_0001101: q = 4'b0000;
-		11'b110_0001110: q = 4'b0000;
-		11'b110_0001111: q = 4'b0000;
-		11'b110_0010000: q = 4'b0100;
-		11'b110_0010001: q = 4'b0100;
-		11'b110_0010010: q = 4'b0100;
-		11'b110_0010011: q = 4'b0100;
-		11'b110_0010100: q = 4'b0100;
-		11'b110_0010101: q = 4'b0100;
-		11'b110_0010110: q = 4'b0100;
-		11'b110_0010111: q = 4'b0100;
-		11'b110_0011000: q = 4'b0100;
-		11'b110_0011001: q = 4'b0100;
-		11'b110_0011010: q = 4'b0100;
-		11'b110_0011011: q = 4'b0100;
-		11'b110_0011100: q = 4'b0100;
-		11'b110_0011101: q = 4'b0100;
-		11'b110_0011110: q = 4'b0100;
-		11'b110_0011111: q = 4'b0100;
-		11'b110_0100000: q = 4'b0100;
-		11'b110_0100001: q = 4'b0100;
-		11'b110_0100010: q = 4'b0100;
-		11'b110_0100011: q = 4'b0100;
-		11'b110_0100100: q = 4'b0100;
-		11'b110_0100101: q = 4'b0100;
-		11'b110_0100110: q = 4'b0100;
-		11'b110_0100111: q = 4'b0100;
-		11'b110_0101000: q = 4'b1000;
-		11'b110_0101001: q = 4'b1000;
-		11'b110_0101010: q = 4'b1000;
-		11'b110_0101011: q = 4'b1000;
-		11'b110_0101100: q = 4'b1000;
-		11'b110_0101101: q = 4'b1000;
-		11'b110_0101110: q = 4'b1000;
-		11'b110_0101111: q = 4'b1000;
-		11'b110_0110000: q = 4'b1000;
-		11'b110_0110001: q = 4'b1000;
-		11'b110_0110010: q = 4'b1000;
-		11'b110_0110011: q = 4'b1000;
-		11'b110_0110100: q = 4'b1000;
-		11'b110_0110101: q = 4'b1000;
-		11'b110_0110110: q = 4'b1000;
-		11'b110_0110111: q = 4'b1000;
-		11'b110_0111000: q = 4'b1000;
-		11'b110_0111001: q = 4'b1000;
-		11'b110_0111010: q = 4'b1000;
-		11'b110_0111011: q = 4'b1000;
-		11'b110_0111100: q = 4'b1000;
-		11'b110_0111101: q = 4'b1000;
-		11'b110_0111110: q = 4'b1000;
-		11'b110_0111111: q = 4'b1000;
-		11'b110_1000000: q = 4'b0001;
-		11'b110_1000001: q = 4'b0001;
-		11'b110_1000010: q = 4'b0001;
-		11'b110_1000011: q = 4'b0001;
-		11'b110_1000100: q = 4'b0001;
-		11'b110_1000101: q = 4'b0001;
-		11'b110_1000110: q = 4'b0001;
-		11'b110_1000111: q = 4'b0001;
-		11'b110_1001000: q = 4'b0001;
-		11'b110_1001001: q = 4'b0001;
-		11'b110_1001010: q = 4'b0001;
-		11'b110_1001011: q = 4'b0001;
-		11'b110_1001100: q = 4'b0001;
-		11'b110_1001101: q = 4'b0001;
-		11'b110_1001110: q = 4'b0001;
-		11'b110_1001111: q = 4'b0001;
-		11'b110_1010000: q = 4'b0001;
-		11'b110_1010001: q = 4'b0001;
-		11'b110_1010010: q = 4'b0001;
-		11'b110_1010011: q = 4'b0001;
-		11'b110_1010100: q = 4'b0010;
-		11'b110_1010101: q = 4'b0010;
-		11'b110_1010110: q = 4'b0010;
-		11'b110_1010111: q = 4'b0010;
-		11'b110_1011000: q = 4'b0010;
-		11'b110_1011001: q = 4'b0010;
-		11'b110_1011010: q = 4'b0010;
-		11'b110_1011011: q = 4'b0010;
-		11'b110_1011100: q = 4'b0010;
-		11'b110_1011101: q = 4'b0010;
-		11'b110_1011110: q = 4'b0010;
-		11'b110_1011111: q = 4'b0010;
-		11'b110_1100000: q = 4'b0010;
-		11'b110_1100001: q = 4'b0010;
-		11'b110_1100010: q = 4'b0010;
-		11'b110_1100011: q = 4'b0010;
-		11'b110_1100100: q = 4'b0010;
-		11'b110_1100101: q = 4'b0010;
-		11'b110_1100110: q = 4'b0010;
-		11'b110_1100111: q = 4'b0010;
-		11'b110_1101000: q = 4'b0010;
-		11'b110_1101001: q = 4'b0010;
-		11'b110_1101010: q = 4'b0010;
-		11'b110_1101011: q = 4'b0010;
-		11'b110_1101100: q = 4'b0010;
-		11'b110_1101101: q = 4'b0010;
-		11'b110_1101110: q = 4'b0010;
-		11'b110_1101111: q = 4'b0010;
-		11'b110_1110000: q = 4'b0000;
-		11'b110_1110001: q = 4'b0000;
-		11'b110_1110010: q = 4'b0000;
-		11'b110_1110011: q = 4'b0000;
-		11'b110_1110100: q = 4'b0000;
-		11'b110_1110101: q = 4'b0000;
-		11'b110_1110110: q = 4'b0000;
-		11'b110_1110111: q = 4'b0000;
-		11'b110_1111000: q = 4'b0000;
-		11'b110_1111001: q = 4'b0000;
-		11'b110_1111010: q = 4'b0000;
-		11'b110_1111011: q = 4'b0000;
-		11'b110_1111100: q = 4'b0000;
-		11'b110_1111101: q = 4'b0000;
-		11'b110_1111110: q = 4'b0000;
-		11'b110_1111111: q = 4'b0000;
-		11'b111_0000000: q = 4'b0000;
-		11'b111_0000001: q = 4'b0000;
-		11'b111_0000010: q = 4'b0000;
-		11'b111_0000011: q = 4'b0000;
-		11'b111_0000100: q = 4'b0000;
-		11'b111_0000101: q = 4'b0000;
-		11'b111_0000110: q = 4'b0000;
-		11'b111_0000111: q = 4'b0000;
-		11'b111_0001000: q = 4'b0000;
-		11'b111_0001001: q = 4'b0000;
-		11'b111_0001010: q = 4'b0000;
-		11'b111_0001011: q = 4'b0000;
-		11'b111_0001100: q = 4'b0000;
-		11'b111_0001101: q = 4'b0000;
-		11'b111_0001110: q = 4'b0000;
-		11'b111_0001111: q = 4'b0000;
-		11'b111_0010000: q = 4'b0100;
-		11'b111_0010001: q = 4'b0100;
-		11'b111_0010010: q = 4'b0100;
-		11'b111_0010011: q = 4'b0100;
-		11'b111_0010100: q = 4'b0100;
-		11'b111_0010101: q = 4'b0100;
-		11'b111_0010110: q = 4'b0100;
-		11'b111_0010111: q = 4'b0100;
-		11'b111_0011000: q = 4'b0100;
-		11'b111_0011001: q = 4'b0100;
-		11'b111_0011010: q = 4'b0100;
-		11'b111_0011011: q = 4'b0100;
-		11'b111_0011100: q = 4'b0100;
-		11'b111_0011101: q = 4'b0100;
-		11'b111_0011110: q = 4'b0100;
-		11'b111_0011111: q = 4'b0100;
-		11'b111_0100000: q = 4'b0100;
-		11'b111_0100001: q = 4'b0100;
-		11'b111_0100010: q = 4'b0100;
-		11'b111_0100011: q = 4'b0100;
-		11'b111_0100100: q = 4'b0100;
-		11'b111_0100101: q = 4'b0100;
-		11'b111_0100110: q = 4'b0100;
-		11'b111_0100111: q = 4'b0100;
-		11'b111_0101000: q = 4'b0100;
-		11'b111_0101001: q = 4'b0100;
-		11'b111_0101010: q = 4'b0100;
-		11'b111_0101011: q = 4'b0100;
-		11'b111_0101100: q = 4'b1000;
-		11'b111_0101101: q = 4'b1000;
-		11'b111_0101110: q = 4'b1000;
-		11'b111_0101111: q = 4'b1000;
-		11'b111_0110000: q = 4'b1000;
-		11'b111_0110001: q = 4'b1000;
-		11'b111_0110010: q = 4'b1000;
-		11'b111_0110011: q = 4'b1000;
-		11'b111_0110100: q = 4'b1000;
-		11'b111_0110101: q = 4'b1000;
-		11'b111_0110110: q = 4'b1000;
-		11'b111_0110111: q = 4'b1000;
-		11'b111_0111000: q = 4'b1000;
-		11'b111_0111001: q = 4'b1000;
-		11'b111_0111010: q = 4'b1000;
-		11'b111_0111011: q = 4'b1000;
-		11'b111_0111100: q = 4'b1000;
-		11'b111_0111101: q = 4'b1000;
-		11'b111_0111110: q = 4'b1000;
-		11'b111_0111111: q = 4'b1000;
-		11'b111_1000000: q = 4'b0001;
-		11'b111_1000001: q = 4'b0001;
-		11'b111_1000010: q = 4'b0001;
-		11'b111_1000011: q = 4'b0001;
-		11'b111_1000100: q = 4'b0001;
-		11'b111_1000101: q = 4'b0001;
-		11'b111_1000110: q = 4'b0001;
-		11'b111_1000111: q = 4'b0001;
-		11'b111_1001000: q = 4'b0001;
-		11'b111_1001001: q = 4'b0001;
-		11'b111_1001010: q = 4'b0001;
-		11'b111_1001011: q = 4'b0001;
-		11'b111_1001100: q = 4'b0001;
-		11'b111_1001101: q = 4'b0001;
-		11'b111_1001110: q = 4'b0001;
-		11'b111_1001111: q = 4'b0001;
-		11'b111_1010000: q = 4'b0001;
-		11'b111_1010001: q = 4'b0001;
-		11'b111_1010010: q = 4'b0010;
-		11'b111_1010011: q = 4'b0010;
-		11'b111_1010100: q = 4'b0010;
-		11'b111_1010101: q = 4'b0010;
-		11'b111_1010110: q = 4'b0010;
-		11'b111_1010111: q = 4'b0010;
-		11'b111_1011000: q = 4'b0010;
-		11'b111_1011001: q = 4'b0010;
-		11'b111_1011010: q = 4'b0010;
-		11'b111_1011011: q = 4'b0010;
-		11'b111_1011100: q = 4'b0010;
-		11'b111_1011101: q = 4'b0010;
-		11'b111_1011110: q = 4'b0010;
-		11'b111_1011111: q = 4'b0010;
-		11'b111_1100000: q = 4'b0010;
-		11'b111_1100001: q = 4'b0010;
-		11'b111_1100010: q = 4'b0010;
-		11'b111_1100011: q = 4'b0010;
-		11'b111_1100100: q = 4'b0010;
-		11'b111_1100101: q = 4'b0010;
-		11'b111_1100110: q = 4'b0010;
-		11'b111_1100111: q = 4'b0010;
-		11'b111_1101000: q = 4'b0010;
-		11'b111_1101001: q = 4'b0010;
-		11'b111_1101010: q = 4'b0010;
-		11'b111_1101011: q = 4'b0010;
-		11'b111_1101100: q = 4'b0010;
-		11'b111_1101101: q = 4'b0010;
-		11'b111_1101110: q = 4'b0010;
-		11'b111_1101111: q = 4'b0010;
-		11'b111_1110000: q = 4'b0000;
-		11'b111_1110001: q = 4'b0000;
-		11'b111_1110010: q = 4'b0000;
-		11'b111_1110011: q = 4'b0000;
-		11'b111_1110100: q = 4'b0000;
-		11'b111_1110101: q = 4'b0000;
-		11'b111_1110110: q = 4'b0000;
-		11'b111_1110111: q = 4'b0000;
-		11'b111_1111000: q = 4'b0000;
-		11'b111_1111001: q = 4'b0000;
-		11'b111_1111010: q = 4'b0000;
-		11'b111_1111011: q = 4'b0000;
-		11'b111_1111100: q = 4'b0000;
-		11'b111_1111101: q = 4'b0000;
-		11'b111_1111110: q = 4'b0000;
-		11'b111_1111111: q = 4'b0000;
-	endcase
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 3e41c16c..4cd23488 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -30,7 +30,7 @@
 
 `include "wally-config.vh"
 `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
-`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN))
+`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 1))
 
 module srt (
   input  logic clk,
@@ -164,7 +164,7 @@ module srtpreproc (
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
   // Number of cycles of divider
-  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2);
+  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN + 2);
 endmodule
 
 /////////////////////////////////

From 77ea4e47cb7425186a98646684baa6d4db48cd7a Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 13 Jul 2022 15:01:38 -0700
Subject: [PATCH 18/36] removed minus 1 case in rounding

---
 pipelined/src/fpu/divsqrt.sv        |  6 ++--
 pipelined/src/fpu/fma.sv            | 41 +++++++++++-----------
 pipelined/src/fpu/fmashiftcalc.sv   |  7 ++--
 pipelined/src/fpu/fpu.sv            |  5 ++-
 pipelined/src/fpu/lzacorrection.sv  |  3 +-
 pipelined/src/fpu/postprocess.sv    |  7 ++--
 pipelined/src/fpu/round.sv          | 54 ++++++++++-------------------
 pipelined/src/fpu/srt-radix4.sv     |  5 +--
 pipelined/src/fpu/srtfsm.sv         |  4 +--
 pipelined/testbench/testbench-fp.sv |  8 ++---
 10 files changed, 60 insertions(+), 80 deletions(-)

diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index 8420baa1..7e240420 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -43,7 +43,6 @@ module divsqrt(
   input  logic StallM,
   input logic StallE,
   output logic DivStickyM,
-  output logic DivNegStickyM,
   output logic DivBusy,
   output logic DivDone,
   output logic [`NE+1:0] DivCalcExpM,
@@ -58,11 +57,12 @@ module divsqrt(
   logic [`DIVLEN-1:0] X;
   logic [`DIVLEN-1:0] Dpreproc;
   logic [`DURLEN-1:0] Dur;
+  logic NegSticky;
 
   srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
   srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
-                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
+                .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
+  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
                 .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 57b053da..039876e9 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -70,20 +70,21 @@ module fma(
     ///////////////////////////////////////////////////////////////////////////////
     // Alignment shifter
     ///////////////////////////////////////////////////////////////////////////////
-
-    align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
-                        .Am, .ZmSticky, .KillProd);
-                        
     // calculate the signs and take the opperation into account
     sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
 
+    align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
+                .Ps, .As, .Am, .ZmSticky, .KillProd);
+                        
+
+
     // ///////////////////////////////////////////////////////////////////////////////
     // // Addition/LZA
     // ///////////////////////////////////////////////////////////////////////////////
         
-    add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
+    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
     
-    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
+    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
 endmodule
 
 
@@ -142,6 +143,7 @@ endmodule
 
 
 module align(
+    input logic                 As, Ps,
     input logic  [`NE-1:0]      Xe, Ye, Ze,      // biased exponents in B(NE.0) format
     input logic  [`NF:0]        Zm,      // significand in U(0.NF) format]
     input logic                 XZero, YZero, ZZero, // is the input zero
@@ -172,7 +174,7 @@ module align(
     // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
     assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
     
-    assign KillProd = ACnt[`NE+1]|XZero|YZero;
+    assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
     assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
 
     always_comb
@@ -183,7 +185,7 @@ module align(
         //          |   54'b0    |  106'b(product)  | 2'b0 |
         //  | addnend |
         if (KillProd) begin
-            ZmShifted = ZmPreshifted;
+            ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)};
             ZmSticky = ~(XZero|YZero);
 
         // If the addend is too small to effect the addition        
@@ -221,6 +223,7 @@ module add(
     input logic  [2*`NF+1:0]    Pm,       // the product's mantissa
     input logic                 Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
     input logic                 KillProd,      // should the product be set to 0
+    input logic                 ZmSticky,
     input logic                 XZero, YZero, // is the input zero
     output logic [3*`NF+6:0]    AmInv,  // aligned addend possibly inverted
     output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
@@ -243,13 +246,14 @@ module add(
     assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
     // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
     assign PmKilled = Pm&{2*`NF+2{~KillProd}};
-
-
-
     // Do the addition
     //      - calculate a positive and negitive sum in parallel
-    assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA};
-    assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
+    //              Zsticky             Psticky
+    // PreSum    -1 = don't add 1     +1 = add 2
+    // NegPreSum +1 = add 2           -1 = don't add 1
+    // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0
+    assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))};
+    assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)};
      
     // Is the sum negitive
     assign NegSum = PreSum[3*`NF+6];
@@ -261,7 +265,7 @@ endmodule
 
 module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
     input logic  [3*`NF+6:0] A,     // addend
-    input logic  [2*`NF+1:0] P,     // product
+    input logic  [2*`NF+3:0] P,     // product
     output logic [$clog2(3*`NF+7)-1:0]       NCnt   // normalization shift count for the positive result
     ); 
     
@@ -273,12 +277,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
     assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
     assign G[3*`NF+6:2*`NF+4] = 0;
     assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
-    assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
-    assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
-    assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
-    assign T[1:0] = A[1:0];
-    assign G[1:0] = 0;
-    assign Z[1:0] = ~A[1:0];
+    assign T[2*`NF+3:0] = A[2*`NF+3:0]^P;
+    assign G[2*`NF+3:0] = A[2*`NF+3:0]&P;
+    assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P;
 
 
     // Apply function to determine Leading pattern
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 3c286b50..ae974eb0 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -35,7 +35,6 @@ module fmashiftcalc(
     input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
     input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
     input logic                         FmaKillProd,  // is the product set to zero
-    input logic 			            ZDenorm,
     output logic [`NE+1:0]              FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
@@ -54,7 +53,7 @@ module fmashiftcalc(
 
     // calculate the sum's exponent
     //                                                                      ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
-    assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
+    assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
@@ -149,9 +148,9 @@ module fmashiftcalc(
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
-    assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift;
+    assign FmaShiftAmt = FmaNCnt+DenormShift;
 endmodule
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index bd018253..5428481d 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -127,7 +127,6 @@ module fpu (
    //divide signals
    logic [`QLEN-1:0] QuotM;
    logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
-   logic DivNegStickyE, DivNegStickyM;
    logic DivStickyE, DivStickyM;
    logic DivDoneM;
    logic [`DURLEN-1:0] EarlyTermShiftM;
@@ -288,7 +287,7 @@ module fpu (
    //       .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
    divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
-                  .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
+                  .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
                   .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
    // other FP execution units
    fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
@@ -384,7 +383,7 @@ module fpu (
    postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
                            .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
                            .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
-                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM),
+                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
                            .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
                            .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv
index 17db0c0b..eb9d3559 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@@ -37,7 +37,6 @@ module lzacorrection(
     input logic  [`NE+1:0]          DivDenormShift,
     input logic  [`NE+1:0]          FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
-    input logic                     FmaKillProd,  // is the product set to zero
     input logic                     FmaSZero,
     output logic [`CORRSHIFTSZ-1:0] Nfrac,         // the shifted sum before LZA correction
     output logic [`NE+1:0]          DivCorrExp,
@@ -59,7 +58,7 @@ module lzacorrection(
     assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}};
+    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 30945532..d7fcb2a0 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -56,7 +56,6 @@ module postprocess (
     //divide signals
     input logic  [`DURLEN-1:0]              DivEarlyTermShift,
     input logic                             DivSticky,
-    input logic                             DivNegSticky,
     input logic                             DivDone,
     input logic  [`NE+1:0]                  DivCalcExp,
     input logic  [`QLEN-1:0]                Quot,
@@ -153,7 +152,7 @@ module postprocess (
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
-                          .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+                          .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
     divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
     always_comb
@@ -183,7 +182,7 @@ module postprocess (
     
     normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
-    lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp,
+    lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp,
                                 .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
                                 .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
 
@@ -203,7 +202,7 @@ module postprocess (
 
     round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
                 .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
-                .DivSticky, .DivNegSticky, .DivDone,
+                .DivSticky, .DivDone,
                 .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
 
     ///////////////////////////////////////////////////////////////////////////////
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index c73edc08..e2b9cb3e 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -55,7 +55,6 @@ module round(
     input logic  [`NE:0]            CvtCe,    // the calculated expoent
     input logic  [`NE+1:0]          DivCorrExp,    // the calculated expoent
     input logic                     DivSticky,             // sticky bit
-    input logic                     DivNegSticky,
     output logic                    UfPlus1,  // do you add or subtract on from the result
     output logic [`NE+1:0]          FullRe,      // Re with bits to determine sign and overflow
     output logic [`NF-1:0]          Rf,         // Result fraction
@@ -67,7 +66,6 @@ module round(
     output logic                    R, UfLSBRes // bits needed to calculate rounding
 );
     logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
-    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
     logic           UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
     logic           NormSumSticky;  // normalized sum's sticky bit
     logic           UfSticky;   // sticky bit for underlow calculation
@@ -254,40 +252,25 @@ module round(
     assign S = UfSticky | UfRound;
 
 
-    // Deterimine if a small number was supposed to be subtrated
-    //  - for FMA or if division has a negitive sticky bit
-    assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound);
-    assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky;
-
-
     always_comb begin
         // Determine if you add 1
         case (Frm)
-            3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even
+            3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even
             3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down
-            3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up
-            3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude
+            3'b010: CalcPlus1 = Nsgn;//round down
+            3'b011: CalcPlus1 = ~Nsgn;//round up
+            3'b100: CalcPlus1 = R;//round to nearest max magnitude
             default: CalcPlus1 = 1'bx;
         endcase
         // Determine if you add 1 (for underflow flag)
         case (Frm)
-            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
+            3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even
             3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down
-            3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up
-            3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
+            3'b010: UfCalcPlus1 = Nsgn;//round down
+            3'b011: UfCalcPlus1 = ~Nsgn;//round up
+            3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude
             default: UfCalcPlus1 = 1'bx;
         endcase
-        // Determine if you subtract 1
-        case (Frm)
-            3'b000: CalcMinus1 = 0;//round to nearest even
-            3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero
-            3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down
-            3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up
-            3'b100: CalcMinus1 = 0;//round to nearest max magnitude
-            default: CalcMinus1 = 1'bx;
-        endcase
    
     end
 
@@ -295,26 +278,25 @@ module round(
     assign Plus1 = CalcPlus1 & (S | R);
     assign FpPlus1 = Plus1&~(ToInt&CvtOp);
     assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
-    assign Minus1 = CalcMinus1 & (S | R);
 
     // Compute rounded result
     if (`FPSIZES == 1) begin
-        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
+        assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
 
     end else if (`FPSIZES == 2) begin
         // \/FLEN+1
         //  | NE+2 |        NF      |
         //  '-NE+2-^----NF1----^
         // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-        assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
-                                   Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+        assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} :
+                                   {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
 
     end else if (`FPSIZES == 3) begin
         always_comb begin
             case (OutFmt)
-                `FMT:  RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
-                `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
-                `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
+                `FMT:  RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1};
+                `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+                `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
                 default: RoundAdd = (`FLEN+1)'(0);
             endcase
         end
@@ -322,10 +304,10 @@ module round(
     end else if (`FPSIZES == 4) begin        
         always_comb begin
             case (OutFmt)
-                2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
-                2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
-                2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
-                2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
+                2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
+                2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
+                2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
+                2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
             endcase
         end
 
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
index 5a7e96e2..b1bf6f56 100644
--- a/pipelined/src/fpu/srt-radix4.sv
+++ b/pipelined/src/fpu/srt-radix4.sv
@@ -40,6 +40,7 @@ module srtradix4(
   input logic [`DIVLEN-1:0] X,
   input logic [`DIVLEN-1:0] Dpreproc,
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input logic NegSticky,
   output logic [`QLEN-1:0] Quot,
   output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
@@ -106,9 +107,9 @@ module srtradix4(
   // if starting a new divison set Q to 0 and QM to -1
   mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
   flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
-  flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
+  flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
 
-  assign Quot = Q[0];
+  assign Quot = NegSticky ? QM[0] : Q[0];
   assign FirstWS = WS[0];
   assign FirstWC = WC[0];
 
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 21e35c36..481b1b22 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -44,7 +44,7 @@ module srtfsm(
   output logic [`DURLEN-1:0] EarlyTermShiftE,
   output logic DivStickyE,
   output logic DivDone,
-  output logic DivNegStickyE,
+  output logic NegSticky,
   output logic DivBusy
   );
   
@@ -62,7 +62,7 @@ module srtfsm(
   assign DivStickyE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
-  assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
+  assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
   assign EarlyTermShiftE = step;
 
   always_ff @(posedge clk) begin
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 2aec1ab1..033045e7 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -681,7 +681,7 @@ module testbenchfp;
   postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
               .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
               .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
-              .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky,
+              .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
@@ -697,8 +697,8 @@ module testbenchfp;
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
   srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
   srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
-                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
+                .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
+  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
                 .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
 
   assign CmpFlg[3:0] = 0;
@@ -854,7 +854,7 @@ end
 
     // check if result is correct
     //  - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);

From 7e163e22a3eb17a876f574089ec8722cc2e1140e Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 13 Jul 2022 15:28:22 -0700
Subject: [PATCH 19/36] some code cleanup

---
 pipelined/src/fpu/fma.sv         | 11 ++++-------
 pipelined/src/fpu/postprocess.sv |  7 +++----
 pipelined/src/fpu/round.sv       | 31 ++++++-------------------------
 3 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 039876e9..44cd3616 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -51,7 +51,6 @@ module fma(
     logic [3*`NF+5:0]   Am;     // addend aligned's mantissa for addition in U(NF+5.2NF+1)
     logic [3*`NF+6:0]   AmInv;   // aligned addend's mantissa possibly inverted
     logic [2*`NF+1:0]   PmKilled;      // the product's mantissa possibly killed
-    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
     ///////////////////////////////////////////////////////////////////////////////
     // Calculate the product
     //      - When multipliying two fp numbers, add the exponents
@@ -74,7 +73,7 @@ module fma(
     sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
 
     align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
-                .Ps, .As, .Am, .ZmSticky, .KillProd);
+                .Am, .ZmSticky, .KillProd);
                         
 
 
@@ -82,7 +81,7 @@ module fma(
     // // Addition/LZA
     // ///////////////////////////////////////////////////////////////////////////////
         
-    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
+    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
     
     loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
 endmodule
@@ -143,7 +142,6 @@ endmodule
 
 
 module align(
-    input logic                 As, Ps,
     input logic  [`NE-1:0]      Xe, Ye, Ze,      // biased exponents in B(NE.0) format
     input logic  [`NF:0]        Zm,      // significand in U(0.NF) format]
     input logic                 XZero, YZero, ZZero, // is the input zero
@@ -224,14 +222,13 @@ module add(
     input logic                 Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
     input logic                 KillProd,      // should the product be set to 0
     input logic                 ZmSticky,
-    input logic                 XZero, YZero, // is the input zero
     output logic [3*`NF+6:0]    AmInv,  // aligned addend possibly inverted
     output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
     output logic                NegSum,        // was the sum negitive
     output logic                InvA,          // do you invert the aligned addend
-    output logic [3*`NF+5:0]    Sm,           // the positive sum
-    output logic [3*`NF+6:0]    PreSum, NegPreSum// possibly negitive sum
+    output logic [3*`NF+5:0]    Sm           // the positive sum
 );
+    logic [3*`NF+6:0]    PreSum, NegPreSum; // possibly negitive sum
 
     ///////////////////////////////////////////////////////////////////////////////
     // Addition
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index d7fcb2a0..3060e51d 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -84,7 +84,6 @@ module postprocess (
     logic S;           // S bit
     logic UfPlus1;                    // do you add one (for determining underflow flag)
     logic R;   // bits needed to determine rounding
-    logic [`FLEN:0] RoundAdd;       // how much to add to the result
     logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt;   // normalization shift count
     logic [`NORMSHIFTSZ-1:0] ShiftIn;        // is the sum zero
     logic [`NORMSHIFTSZ-1:0] Shifted;    // the shifted result
@@ -200,10 +199,10 @@ module postprocess (
     roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, 
                           .Xs, .Ys, .CvtCs, .Nsgn);
 
-    round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
-                .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
+    round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
+                .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
                 .DivSticky, .DivDone,
-                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp);
+                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index e2b9cb3e..38bacce0 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -48,8 +48,6 @@ module round(
     input logic                     CvtResUf,
     input logic  [`CORRSHIFTSZ-1:0] Nfrac,
     input logic                     FmaZmSticky,  // addend's sticky bit
-    input logic                     ZZero,         // is Z zero
-    input logic                     FmaInvA,          // invert Z
     input logic  [`NE+1:0]          FmaSe,         // exponent of the normalized sum
     input logic                     Nsgn,      // the result's sign
     input logic  [`NE:0]            CvtCe,    // the calculated expoent
@@ -62,11 +60,10 @@ module round(
     output logic                    S,             // sticky bit
     output logic [`NE+1:0]          Nexp,
     output logic                    Plus1,
-    output logic [`FLEN:0]          RoundAdd,           // how much to add to the result
     output logic                    R, UfLSBRes // bits needed to calculate rounding
 );
     logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
-    logic           UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
+    logic           UfCalcPlus1; 
     logic           NormSumSticky;  // normalized sum's sticky bit
     logic           UfSticky;   // sticky bit for underlow calculation
     logic [`NF-1:0] RoundFrac;
@@ -74,6 +71,7 @@ module round(
     logic           UfRound;
     logic           FpRound, FpLSBRes, FpUfRound;
     logic           CalcPlus1, FpPlus1;
+    logic [`FLEN:0] RoundAdd;           // how much to add to the result
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -288,30 +286,13 @@ module round(
         //  | NE+2 |        NF      |
         //  '-NE+2-^----NF1----^
         // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-        assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} :
-                                   {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
+        assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
 
     end else if (`FPSIZES == 3) begin
-        always_comb begin
-            case (OutFmt)
-                `FMT:  RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1};
-                `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
-                `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
-                default: RoundAdd = (`FLEN+1)'(0);
-            endcase
-        end
+        assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
 
-    end else if (`FPSIZES == 4) begin        
-        always_comb begin
-            case (OutFmt)
-                2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
-                2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
-                2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
-                2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
-            endcase
-        end
-
-    end
+    end else if (`FPSIZES == 4)      
+        assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
 
     // determine the result to be roundned
     assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];

From 7629173b152f6ab6d12c7a6070c5db09bbb61d3d Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 13 Jul 2022 22:42:39 +0000
Subject: [PATCH 20/36] DIVLEN and counter updated for sqrt computation and
 rounding

---
 pipelined/config/shared/wally-shared.vh |  2 +-
 pipelined/srt/srt-waves.do              |  2 +-
 pipelined/srt/srt.sv                    | 35 +++++++++----------------
 3 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index ad52be2e..73810c68 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -103,7 +103,7 @@
 // division constants
 `define RADIX 32'h4
 `define DIVCOPIES 32'h4
-`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 1))
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN + 2) : (`NF + 2))
 `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do
index 340c5b1f..1e0c3f28 100644
--- a/pipelined/srt/srt-waves.do
+++ b/pipelined/srt/srt-waves.do
@@ -1,5 +1,5 @@
 add wave -noupdate /testbench/*
 add wave -noupdate /testbench/srt/*
-add wave -noupdate /testbench/srt/otfc2/*
+add wave -noupdate /testbench/srt/sotfc2/*
 add wave -noupdate /testbench/srt/preproc/*
 add wave -noupdate /testbench/srt/divcounter/*
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 4cd23488..74ce48cd 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -29,8 +29,8 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 `include "wally-config.vh"
-`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0)
-`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 1))
+`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF + 2) : 2)
+`define EXTRAINTBITS ((`NF<(`XLEN)) ? 2 : (`NF - `XLEN + 2))
 
 module srt (
   input  logic clk,
@@ -49,7 +49,7 @@ module srt (
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       rsign, done,
-  output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers
+  output logic [`DIVLEN-3:0] Rem, Quot, // *** later handle integers
   output logic [`NE-1:0] rExp,
   output logic [3:0] Flags
 );
@@ -164,7 +164,7 @@ module srtpreproc (
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
   // Number of cycles of divider
-  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN + 2);
+  assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN);
 endmodule
 
 /////////////////////////////////
@@ -226,26 +226,16 @@ endmodule
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
-module otfc2 #(parameter N=64) (
+module otfc2 #(parameter N=66) (
   input  logic         clk,
   input  logic         Start,
   input  logic         qp, qz, qn,
-  output logic [N-1:0] r
+  output logic [N-3:0] r
 );
-
   //  The on-the-fly converter transfers the quotient 
-  //  bits to the quotient as they come. 
-  //
-  //  This code follows the psuedocode presented in the 
-  //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-2 division.
-  //
-  //  QM is Q-1. It allows us to write negative bits 
-  //  without using a costly CPA. 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division only.
   logic [N+2:0] Q, QM, QNext, QMNext, QMMux;
-  //  QR and QMR are the shifted versions of Q and QM.
-  //  They are treated as [N-1:r] size signals, and 
-  //  discard the r most significant bits of Q and QM. 
   logic [N+1:0] QR, QMR;
 
   flopr #(N+3) Qreg(clk, Start, QNext, Q);
@@ -266,7 +256,7 @@ module otfc2 #(parameter N=64) (
       QMNext = {QMR, 1'b0};
     end 
   end
-  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+  assign r = Q[N] ? Q[N-1:2] : Q[N-2:1];
 
 endmodule
 
@@ -278,13 +268,12 @@ module sotfc2(
   input  logic         Start,
   input  logic         sp, sn,
   input  logic [`DIVLEN+3:0] C,
-  output logic [`DIVLEN-1:0] Sq,
+  output logic [`DIVLEN-3:0] Sq,
   output logic [`DIVLEN+3:0] F
 );
-
-
   //  The on-the-fly converter transfers the square root 
   //  bits to the quotient as they come.
+  //  Use this otfc for division and square root.
   logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
 
   flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM);
@@ -303,7 +292,7 @@ module sotfc2(
       SMNext = SM | ((C << 2) & ~(C << 1));
     end 
   end
-  assign Sq = S[`DIVLEN-1:0];
+  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1];
 
   fsel2 fsel(sp, sn, C, S, SM, F);
 

From e5a8ac2a442c1d2afea46a56ddf9eea8b9d43029 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 13 Jul 2022 23:44:54 +0000
Subject: [PATCH 21/36] renamed a file to fit diagram

---
 pipelined/src/fpu/postprocess.sv                           | 2 +-
 pipelined/src/fpu/{lzacorrection.sv => shiftcorrection.sv} | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename pipelined/src/fpu/{lzacorrection.sv => shiftcorrection.sv} (99%)

diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 3060e51d..bc9c46a2 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -181,7 +181,7 @@ module postprocess (
     
     normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
-    lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp,
+    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp,
                                 .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
                                 .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
 
diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
similarity index 99%
rename from pipelined/src/fpu/lzacorrection.sv
rename to pipelined/src/fpu/shiftcorrection.sv
index eb9d3559..f12cb831 100644
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -28,7 +28,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 
-module lzacorrection(
+module shiftcorrection(
     input logic  [`NORMSHIFTSZ-1:0] Shifted,         // the shifted sum before LZA correction
     input logic                     FmaOp,
     input logic                     DivOp,

From 8506d2be4cb66eec64e177bcb7ca98ac485a3cbd Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 14 Jul 2022 00:01:07 +0000
Subject: [PATCH 22/36] fixed uncommented line in makefile

---
 tests/riscof/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index 621a5b54..af67a535 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test
 current_dir = $(shell pwd)
 XLEN    ?= 64
 
-all: root build_arch build_wally memfile
+all: root build_arch #build_wally memfile
 
 root:
 	mkdir -p $(work_dir)

From f49c2a969f3b7537a3fe189f2ab3f1941ae05987 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 14 Jul 2022 00:39:30 +0000
Subject: [PATCH 23/36] S and SM are updating but are not correct yet

---
 pipelined/srt/srt.sv       | 12 ++++++------
 pipelined/srt/testbench.sv |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 74ce48cd..b87fabfe 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -215,7 +215,7 @@ module fsel2 (
   // Generate for both positive and negative bits
   assign FP = ~S & C;
   assign FN = SM | (C & (~C << 2));
-  assign FZ = {(`DIVLEN+4){1'B0}};
+  assign FZ = {(`DIVLEN+4){1'b0}};
 
   // Choose which adder input will be used
 
@@ -276,20 +276,20 @@ module sotfc2(
   //  Use this otfc for division and square root.
   logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
 
-  flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM);
+  flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
   mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
-  flop #(`DIVLEN+4) SMreg(clk, SMux, S);
+  flop #(`DIVLEN+4) Sreg(clk, SMux, S);
 
   always_comb begin
     if (sp) begin
-      SNext  = S | ((C << 2) & ~(C << 1));
+      SNext  = S | ((C << 1) & ~(C << 2));
       SMNext = S;
     end else if (sn) begin
-      SNext  = SM | ((C << 2) & ~(C << 1));
+      SNext  = SM | ((C << 1) & ~(C << 2));
       SMNext = SM;
     end else begin        // If sp and sn are not true, then sz is
       SNext  = S;
-      SMNext = SM | ((C << 2) & ~(C << 1));
+      SMNext = SM | ((C << 1) & ~(C << 2));
     end 
   end
   assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1];
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index b83e6b00..02cd0bca 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -72,7 +72,7 @@ module testbench;
 
   // Equip Int test or Sqrt test
   assign Int = 1'b0;
-  assign Sqrt = 1'b0;
+  assign Sqrt = 1'b1;
 
   // Divider
   srt srt(.clk, .Start(req), 
@@ -155,7 +155,7 @@ module testbench;
         req <= #5 1;
         diffp = correctr[51:0] - r;
         diffn = r - correctr[51:0];
-        if (rExp !== correctr[62:52]) // check if accurate to 1 ulp
+        if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
           begin
             errors = errors + 1;
             $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);

From b069cfbec2201566fbb02bd0c532362e05e12101 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 14 Jul 2022 18:16:00 +0000
Subject: [PATCH 24/36] fixed error in divsqrt

---
 pipelined/config/shared/wally-shared.vh  |  8 ++++----
 pipelined/regression/sim-testfloat       |  4 ++--
 pipelined/regression/sim-testfloat-batch |  2 ++
 pipelined/regression/wave-fpu.do         | 18 +++++++++---------
 pipelined/src/fpu/divshiftcalc.sv        |  2 +-
 pipelined/src/fpu/divsqrt.sv             |  4 ++--
 pipelined/testbench/testbench-fp.sv      |  9 ++++-----
 7 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 73810c68..5dc008bb 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -97,14 +97,14 @@
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9))
-`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
+`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9))
+`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6))
 
 // division constants
 `define RADIX 32'h4
 `define DIVCOPIES 32'h4
-`define DIVLEN ((`NF < `XLEN) ? (`XLEN + 2) : (`NF + 2))
-`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
+`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
+`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
 `define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES))
diff --git a/pipelined/regression/sim-testfloat b/pipelined/regression/sim-testfloat
index 18f174a9..25fe09a1 100755
--- a/pipelined/regression/sim-testfloat
+++ b/pipelined/regression/sim-testfloat
@@ -6,7 +6,7 @@
 # fma    - test fma
 # sub    - test subtraction
 # div    - test division
-# sqrt   - test square ro
+# sqrt   - test square root
 # all    - test everything
 
-vsim -do "do testfloat.do rv64fp mul"
+vsim -do "do testfloat.do rv64fp $1"
diff --git a/pipelined/regression/sim-testfloat-batch b/pipelined/regression/sim-testfloat-batch
index f1178f1d..c7f28a55 100755
--- a/pipelined/regression/sim-testfloat-batch
+++ b/pipelined/regression/sim-testfloat-batch
@@ -1,7 +1,9 @@
+
 # cvtint - test integer conversion unit (fcvtint)
 # cvtfp  - test floating-point conversion unit (fcvtfp)
 # cmp    - test comparison unit's LT, LE, EQ opperations (fcmp)
 # add    - test addition
+# fma    - test fma
 # sub    - test subtraction
 # div    - test division
 # sqrt   - test square root
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 9a3d7e06..9caf75de 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -9,23 +9,23 @@ add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
 add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/DivBusy
-add wave -noupdate /testbenchfp/srtfsm/state
+add wave -noupdate /testbenchfp/divsqrt/srtfsm/state
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index a4f3feff..3d31d863 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -35,7 +35,7 @@ module divshiftcalc(
     // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
     assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)};
 
-    // *** may be able to reduce shifter size
+    // *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion
     assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}};
 
 endmodule
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index 7e240420..91e07b08 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -59,10 +59,10 @@ module divsqrt(
   logic [`DURLEN-1:0] Dur;
   logic NegSticky;
 
-  srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
   srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
-                .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
+               .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
   srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
                 .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 033045e7..1493903e 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -695,11 +695,10 @@ module testbenchfp;
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
-  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
-                .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
-                .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
+  divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), 
+                  .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), 
+                  .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp),
+                  .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone);
 
   assign CmpFlg[3:0] = 0;
 

From 38bbd19abfaa7cda7393c8dc6d3bc727fc2872b8 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 14 Jul 2022 19:38:27 +0000
Subject: [PATCH 25/36] Six tests passing and a bunch of sizizing issues fixed

---
 pipelined/config/shared/wally-shared.vh |  2 ++
 pipelined/srt/sqrttestgen.c             | 15 +++++++++---
 pipelined/srt/srt.sv                    |  8 +++----
 pipelined/srt/testbench.sv              | 32 ++++++++++++-------------
 4 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 5dc008bb..1237ef18 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -104,6 +104,8 @@
 `define RADIX 32'h4
 `define DIVCOPIES 32'h4
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
+`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
+`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
 `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c
index 710fc32f..07f34c3c 100644
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@@ -30,11 +30,11 @@ void main(void)
   FILE *fptr;
   double aFrac, rFrac;
   int    aExp,  rExp;
-  double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625,
+  double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.2, 1.01, 1.001, 1.0001,
 			  2/1.1, 2/1.5, 2/1.25, 2/1.125};
-  double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10,
         11, 12, 13, 14, 15, 16};
   int i;
   int bias = 1023;
@@ -47,10 +47,19 @@ void main(void)
   for (i=0; i<ENTRIES; i++) {
     aFrac = mans[i];
     aExp  = exps[i] + bias;
-    rFrac = sqrt(aFrac * pow(2, aExp - bias));
+    rFrac = sqrt(aFrac * pow(2, exps[i]));
     rExp  = (int) (log(rFrac)/log(2) + bias);
     output(fptr, aExp, aFrac, rExp, rFrac);
   }
+
+  //                                  WS
+  // Test 1: sqrt(1) = 1              0000 0000 0000 00
+  // Test 2: sqrt(1849/1024) = 43/32  0000 1100 1110 01
+  // Test 3: sqrt(5)                  0000 0100 0000 00
+  // Test 4: sqrt(9) = 3              1111 1001 0000 00
+  // Test 5: sqrt(17)                 0000 0001 0000 00
+  // Test 6: sqrt(56)                 1111 1110 0000 00
+  // Test 7: sqrt(120)                0000 1110 0000 00
   
   // for (i = 0; i< RANDOM_VECS; i++) {
   //   a = random_input();
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index b87fabfe..8e143efb 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -29,8 +29,6 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 `include "wally-config.vh"
-`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF + 2) : 2)
-`define EXTRAINTBITS ((`NF<(`XLEN)) ? 2 : (`NF - `XLEN + 2))
 
 module srt (
   input  logic clk,
@@ -49,7 +47,7 @@ module srt (
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       rsign, done,
-  output logic [`DIVLEN-3:0] Rem, Quot, // *** later handle integers
+  output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers
   output logic [`NE-1:0] rExp,
   output logic [3:0] Flags
 );
@@ -268,7 +266,7 @@ module sotfc2(
   input  logic         Start,
   input  logic         sp, sn,
   input  logic [`DIVLEN+3:0] C,
-  output logic [`DIVLEN-3:0] Sq,
+  output logic [`DIVLEN-2:0] Sq,
   output logic [`DIVLEN+3:0] F
 );
   //  The on-the-fly converter transfers the square root 
@@ -292,7 +290,7 @@ module sotfc2(
       SMNext = SM | ((C << 1) & ~(C << 2));
     end 
   end
-  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1];
+  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
 
   fsel2 fsel(sp, sn, C, S, SM, F);
 
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 02cd0bca..bbb6dee2 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -1,4 +1,4 @@
-`define DIVLEN 64
+`include "wally-config.vh"
 
 /////////////
 // counter //
@@ -39,17 +39,17 @@ endmodule
 // testbench //
 //////////
 module testbench;
-  logic              clk;
-  logic              req;
-  logic              done;
-  logic              Int;
-  logic [63:0]       a, b;
-  logic [51:0]       afrac, bfrac;
-  logic [10:0]       aExp, bExp;
-  logic              asign, bsign;
-  logic [51:0]       r;
-  logic [63:0]       rInt;
-  logic [`DIVLEN-1:0]  Quot;
+  logic               clk;
+  logic               req;
+  logic               done;
+  logic               Int;
+  logic [`XLEN-1:0]   a, b;
+  logic [`NF-1:0]     afrac, bfrac;
+  logic [`NE-1:0]     aExp, bExp;
+  logic               asign, bsign;
+  logic [`NF-1:0]     r;
+  logic [`XLEN-1:0]   rInt;
+  logic [`DIVLEN-2:0] Quot;
  
   // Test parameters
   parameter MEM_SIZE = 40000;
@@ -108,16 +108,16 @@ module testbench;
       b = Vec[`memb];
       {bsign, bExp, bfrac} = b;
       nextr = Vec[`memr];
-      r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
-      rInt = Quot;
+      r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
+      rInt = {1'b1, Quot};
       req <= #5 1;
     end
   
   // Apply directed test vectors read from file.
 
   always @(posedge clk) begin
-    r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)];
-    rInt = Quot;
+    r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
+    rInt = {1'b1, Quot};
     if (done) begin
       if (~Int & ~Sqrt) begin
         req <= #5 1;

From 9f18f6a203526723bd9f59a8519eb13520f328df Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 14 Jul 2022 21:19:45 +0000
Subject: [PATCH 26/36] Square root

---
 pipelined/srt/sqrttestgen.c |  4 ++--
 pipelined/srt/srt.sv        | 10 +++++-----
 pipelined/srt/testbench.sv  |  3 +--
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c
index 07f34c3c..76c6a664 100644
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@@ -32,9 +32,9 @@ void main(void)
   int    aExp,  rExp;
   double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625,
 			  1.75, 1.875, 1.99999,
-			  1.1, 1.2, 1.01, 1.001, 1.0001,
+			  1.1, 1.5, 1.01, 1.001, 1.0001,
 			  2/1.1, 2/1.5, 2/1.25, 2/1.125};
-  double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
         11, 12, 13, 14, 15, 16};
   int i;
   int bias = 1023;
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 8e143efb..5dcf7e96 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -75,7 +75,7 @@ module srt (
 
   // Quotient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qn);
+  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], qp, qz, qn);
 
   flopen #(`NE) expflop(clk, Start, calcExp, rExp);
   flopen #(1) signflop(clk, Start, calcSign, rsign);
@@ -169,11 +169,11 @@ endmodule
 // Quotient Selection, Radix 2 //
 /////////////////////////////////
 module qsel2 ( // *** eventually just change to 4 bits
-  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
+  input  logic [`DIVLEN+3:`DIVLEN-1] ps, pc, 
   output logic         qp, qz, qn
 );
  
-  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic [`DIVLEN+3:`DIVLEN-1]  p, g;
   logic          magnitude, sign, cout;
 
   // The quotient selection logic is presented for simplicity, not
@@ -184,8 +184,8 @@ module qsel2 ( // *** eventually just change to 4 bits
   assign p = ps ^ pc;
   assign g = ps & pc;
 
-  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
-  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
+  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]);
+  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (p[`DIVLEN] & g[`DIVLEN-1]))));
   assign #1 sign = p[`DIVLEN+3] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index bbb6dee2..39696af4 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -160,10 +160,9 @@ module testbench;
             errors = errors + 1;
             $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp);
             $display("failed\n");
-            $stop;
           end
         if (afrac === 52'hxxxxxxxxxxxxx) begin 
-          $display("%d Tests completed successfully", testnum);
+          $display("%d Tests completed successfully", testnum-errors);
           $stop; end 
       end
     end

From ec9536f983e5229b5e2d48ce5df4de9a46903a58 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 14 Jul 2022 22:52:09 +0000
Subject: [PATCH 27/36] Square root radix 2 working, does not work with
 division

---
 pipelined/srt/srt.sv | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 5dcf7e96..949335bf 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -75,7 +75,7 @@ module srt (
 
   // Quotient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], qp, qz, qn);
+  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn);
 
   flopen #(`NE) expflop(clk, Start, calcExp, rExp);
   flopen #(1) signflop(clk, Start, calcSign, rsign);
@@ -170,6 +170,7 @@ endmodule
 /////////////////////////////////
 module qsel2 ( // *** eventually just change to 4 bits
   input  logic [`DIVLEN+3:`DIVLEN-1] ps, pc, 
+  input  logic         Sqrt,
   output logic         qp, qz, qn
 );
  
@@ -185,7 +186,7 @@ module qsel2 ( // *** eventually just change to 4 bits
   assign g = ps & pc;
 
   assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]);
-  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (p[`DIVLEN] & g[`DIVLEN-1]))));
+  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1])))));
   assign #1 sign = p[`DIVLEN+3] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));

From e2510222696dbb3c43b26bcc42365218861db961 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 15 Jul 2022 20:16:59 +0000
Subject: [PATCH 28/36] merged floating-point radix-2 divider with radix-4

---
 pipelined/config/shared/wally-shared.vh |  11 +-
 pipelined/regression/sim-wally          |   2 +-
 pipelined/regression/wave-fpu.do        |  18 +-
 pipelined/src/fpu/divshiftcalc.sv       |   8 +-
 pipelined/src/fpu/divsqrt.sv            |  11 +-
 pipelined/src/fpu/fpu.sv                |   2 +-
 pipelined/src/fpu/postprocess.sv        |   2 +-
 pipelined/src/fpu/shiftcorrection.sv    |   6 +-
 pipelined/src/fpu/srt-radix4.sv         | 359 ------------------------
 pipelined/src/fpu/srtfsm.sv             |  14 +-
 pipelined/src/fpu/srtpreproc.sv         |  10 +-
 pipelined/testbench/testbench-fp.sv     |   2 +-
 12 files changed, 52 insertions(+), 393 deletions(-)
 delete mode 100644 pipelined/src/fpu/srt-radix4.sv

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 1237ef18..015ef261 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -97,19 +97,20 @@
 `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF))
 `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN))
 `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9))
-`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6))
+`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9))
+`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
 
 // division constants
-`define RADIX 32'h4
-`define DIVCOPIES 32'h4
+`define RADIX 32'h2
+`define DIVCOPIES 32'h1
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
 `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
 `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3))
 `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN)
 `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
-`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES))
+// one interation is required for the integer bit for minimally redundent radix-4
+`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
 `define DURLEN ($clog2(`FPDUR+1))
 `define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
 
diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally
index 06985148..51c8b3ed 100755
--- a/pipelined/regression/sim-wally
+++ b/pipelined/regression/sim-wally
@@ -1,2 +1,2 @@
-vsim -do "do wally-pipelined.do rv32gc arch32i"
+vsim -do "do wally-pipelined.do rv64gc arch64d"
 
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 9caf75de..98c72f17 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -20,12 +20,20 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
-add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/*
-add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/*
+add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/*
+# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
+# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
-add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/*
+add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index 3d31d863..af321b25 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -1,7 +1,7 @@
 `include "wally-config.vh"
 
 module divshiftcalc(
-    input logic  [`QLEN-1:0] Quot,
+    input logic  [`QLEN-1-(`RADIX/4):0] Quot,
     input logic  [`FMTBITS-1:0] Fmt,
     input logic [`DURLEN-1:0] DivEarlyTermShift,
     input logic [`NE+1:0] DivCalcExp,
@@ -30,12 +30,12 @@ module divshiftcalc(
     //  00000000x.xxxxxx... << NF               Exp = DivCalcExp (extra shift done afterwards)
     //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1 (determined after)
     // inital Left shift amount  = NF
+    // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
     assign NormShift = (`NE+2)'(`NF);
     // if the shift amount is negitive then dont shift (keep sticky bit)
     // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
-    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)};
+    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
 
-    // *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion
-    assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}};
+    assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
 
 endmodule
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index 91e07b08..cbf7f95f 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -47,22 +47,23 @@ module divsqrt(
   output logic DivDone,
   output logic [`NE+1:0] DivCalcExpM,
   output logic [`DURLEN-1:0] EarlyTermShiftM,
-  output logic [`QLEN-1:0] QuotM
+  output logic [`QLEN-1-(`RADIX/4):0] QuotM
 //   output logic [`XLEN-1:0] RemM,
 );
 
   logic [`DIVLEN+3:0]  NextWSN, NextWCN;
   logic [`DIVLEN+3:0]  WS, WC;
+  logic [`DIVLEN+3:0] StickyWSA;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DIVLEN-1:0] X;
   logic [`DIVLEN-1:0] Dpreproc;
   logic [`DURLEN-1:0] Dur;
   logic NegSticky;
 
-  srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
   srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
-               .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
-                .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
+               .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
+  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
+                .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 5428481d..1bbd0aea 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -125,7 +125,7 @@ module fpu (
    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`QLEN-1:0] QuotM;
+   logic [`QLEN-1-(`RADIX/4):0] QuotM;
    logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
    logic DivStickyE, DivStickyM;
    logic DivDoneM;
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index bc9c46a2..e0eb50ac 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -58,7 +58,7 @@ module postprocess (
     input logic                             DivSticky,
     input logic                             DivDone,
     input logic  [`NE+1:0]                  DivCalcExp,
-    input logic  [`QLEN-1:0]                Quot,
+    input logic  [`QLEN-1-(`RADIX/4):0]                Quot,
     // conversion signals
     input logic                             CvtCs,     // the result's sign
     input logic  [`NE:0]                    CvtCe,    // the calculated expoent
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index f12cb831..ecfd9ba0 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -43,7 +43,7 @@ module shiftcorrection(
     output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
-    logic [`CORRSHIFTSZ:0] CorrQuotShifted;
+    logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
     logic                  ResDenorm;    // is the result denormalized
     logic                  LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
 
@@ -53,9 +53,9 @@ module shiftcorrection(
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
     assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQuotShifted =  {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0};
+    assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
-    assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
     assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
deleted file mode 100644
index b1bf6f56..00000000
--- a/pipelined/src/fpu/srt-radix4.sv
+++ /dev/null
@@ -1,359 +0,0 @@
-///////////////////////////////////////////
-// srt.sv
-//
-// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
-// Modified:13 January 2022
-//
-// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// MIT LICENSE
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
-// software and associated documentation files (the "Software"), to deal in the Software 
-// without restriction, including without limitation the rights to use, copy, modify, merge, 
-// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
-// to whom the Software is furnished to do so, subject to the following conditions:
-//
-//   The above copyright notice and this permission notice shall be included in all copies or 
-//   substantial portions of the Software.
-//
-//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
-//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
-//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
-//   OR OTHER DEALINGS IN THE SOFTWARE.
-////////////////////////////////////////////////////////////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module srtradix4(
-  input  logic clk,
-  input  logic DivStart, 
-  input  logic DivBusy, 
-  input logic  [`FMTBITS-1:0] FmtE,
-  input  logic [`NE-1:0] XExpE, YExpE,
-  input  logic XZeroE, YZeroE, 
-  input logic [`DIVLEN-1:0] X,
-  input logic [`DIVLEN-1:0] Dpreproc,
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  input logic NegSticky,
-  output logic [`QLEN-1:0] Quot,
-  output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
-  output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
-  output logic  [`NE+1:0] DivCalcExpM,
-  output logic [`XLEN-1:0] Rem
-);
-
-
- /* verilator lint_off UNOPTFLAT */
-  logic [`DIVLEN+3:0]  WSA[`DIVCOPIES-1:0];
-  logic [`DIVLEN+3:0]  WCA[`DIVCOPIES-1:0];
-  logic [`DIVLEN+3:0]  WS[`DIVCOPIES-1:0];
-  logic [`DIVLEN+3:0]  WC[`DIVCOPIES-1:0];
-  logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
-  logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
-  logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
-  logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
- /* verilator lint_on UNOPTFLAT */
-  logic [`DIVLEN+3:0]  WSN, WCN;
-  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
-  logic [`NE+1:0] DivCalcExp;
-  logic [$clog2(`XLEN+1)-1:0] intExp;
-  logic           intSign;
-  logic [`QLEN-1:0] QMMux;
-
-  // Top Muxes and Registers
-  // When start is asserted, the inputs are loaded into the divider.
-  // Otherwise, the divisor is retained and the partial remainder
-  // is fed back for the next iteration.
-  //  - when the start signal is asserted X and 0 are loaded into WS and WC
-  //  - otherwise load WSA into the flipflop
-  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
-  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
-  assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
-  mux2   #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
-  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
-  mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
-  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
-  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
-  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
-
-
-  // Divisor Selections
-  // - choose the negitive version of what's being selected
-  assign DBar = ~D;
-  assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
-  assign D2 = {D[`DIVLEN+2:0], 1'b0};
-
-  genvar i;
-  generate
-    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin
-      divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, 
-      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
-      if(i<(`DIVCOPIES-1)) begin 
-        assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
-        assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
-        assign Q[i+1] = QNext[i];
-        assign QM[i+1] = QMNext[i];
-      end
-    end
-  endgenerate
-
-  // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
-  flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
-  flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
-
-  assign Quot = NegSticky ? QM[0] : Q[0];
-  assign FirstWS = WS[0];
-  assign FirstWC = WC[0];
-
-  expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
-
-endmodule
-
-////////////////
-// Submodules //
-////////////////
-
- /* verilator lint_off UNOPTFLAT */
-module divinteration (
-  input logic clk,
-  input logic DivStart,
-  input logic DivBusy,
-  input logic [`DIVLEN+3:0] D,
-  input logic [`DIVLEN+3:0]  DBar, D2, DBar2,
-  input logic [`QLEN-1:0] Q, QM,
-  input logic [`DIVLEN+3:0]  WS, WC,
-  output logic [`QLEN-1:0] QNext, QMNext, 
-  output logic [`DIVLEN+3:0]  WSA, WCA
-);
- /* verilator lint_on UNOPTFLAT */
-
-  logic [`DIVLEN+3:0]  Dsel;
-  logic [3:0]     q;
-
-  // Quotient Selection logic
-  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  // q encoding:
-	// 1000 = +2
-	// 0100 = +1
-	// 0000 =  0
-	// 0010 = -1
-	// 0001 = -2
-  qsel4 qsel4(.D, .WS, .WC, .q);
-
-  always_comb
-    case (q)
-      4'b1000: Dsel = DBar2;
-      4'b0100: Dsel = DBar;
-      4'b0000: Dsel = {`DIVLEN+4{1'b0}};
-      4'b0010: Dsel = D;
-      4'b0001: Dsel = D2;
-      default: Dsel = {`DIVLEN+4{1'bx}};
-    endcase
-
-  // Partial Product Generation
-  //  WSA, WCA = WS + WC - qD
-  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
-
-  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext);
-
-endmodule
-
-module qsel4 (
-	input logic [`DIVLEN+3:0] D,
-	input logic [`DIVLEN+3:0] WS, WC,
-	output logic [3:0] q
-);
-	logic [6:0] Wmsbs;
-	logic [7:0] PreWmsbs;
-	logic [2:0] Dmsbs;
-	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
-	assign Wmsbs = PreWmsbs[7:1];
-	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
-	// D = 0001.xxx...
-	// Dmsbs = |   |
-  // W =      xxxx.xxx...
-	// Wmsbs = |        |
-
-	logic [3:0] QSel4[1023:0];
-
-  always_comb begin 
-    integer d, w, i, w2;
-    for(d=0; d<8; d++)
-      for(w=0; w<128; w++)begin
-        i = d*128+w;
-        w2 = w-128*(w>=64); // convert to two's complement
-        case(d)
-          0: if($signed(w2)>=$signed(12))      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-4)  QSel4[i] = 4'b0000; 
-            else if(w2>=-13) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          1: if(w2>=14)      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-15) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          2: if(w2>=15)      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-16) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          3: if(w2>=16)      QSel4[i] = 4'b1000;
-            else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-18) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          4: if(w2>=18)      QSel4[i] = 4'b1000;
-            else if(w2>=6)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-20) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          5: if(w2>=20)      QSel4[i] = 4'b1000;
-            else if(w2>=6)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-20) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          6: if(w2>=20)      QSel4[i] = 4'b1000;
-            else if(w2>=8)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-22) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          7: if(w2>=24)      QSel4[i] = 4'b1000;
-            else if(w2>=8)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-24) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-        endcase
-      end
-  end
-	assign q = QSel4[{Dmsbs,Wmsbs}];
-	
-endmodule
-
-///////////////////////////////////
-// On-The-Fly Converter, Radix 2 //
-///////////////////////////////////
-module otfc4 (
-  input  logic         clk,
-  input  logic         DivStart,
-  input  logic         DivBusy,
-  input  logic [3:0]   q,
-  input logic [`QLEN-1:0] Q, QM,
-  output logic [`QLEN-1:0] QNext, QMNext
-);
-
-  //  The on-the-fly converter transfers the quotient 
-  //  bits to the quotient as they come. 
-  //
-  //  This code follows the psuedocode presented in the 
-  //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-4 division.
-  //
-  //  QM is Q-1. It allows us to write negative bits 
-  //  without using a costly CPA. 
-
-  //  QR and QMR are the shifted versions of Q and QM.
-  //  They are treated as [N-1:r] size signals, and 
-  //  discard the r most significant bits of Q and QM. 
-  logic [`QLEN-3:0] QR, QMR;
-
-  // shift Q (quotent) and QM (quotent-1)
-		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
-		// else if 	q = 1   Q = {Q, 01} 	QM = {Q, 00}	
-		// else if 	q = 0   Q = {Q, 00} 	QM = {QM, 11}	
-		// else if 	q = -1	Q = {QM, 11} 	QM = {QM, 10}
-		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
-    // *** how does the 0 concatination numbers work?
-
-  assign QR  = Q[`QLEN-3:0];
-  assign QMR = QM[`QLEN-3:0];     // Shifted Q and QM
-  always_comb begin
-    if (q[3]) begin // +2
-      QNext  = {QR,  2'b10};
-      QMNext = {QR,  2'b01};
-    end else if (q[2]) begin // +1
-      QNext  = {QR,  2'b01};
-      QMNext = {QR,  2'b00};
-    end else if (q[1]) begin // -1
-      QNext  = {QMR,  2'b11};
-      QMNext = {QMR,  2'b10};
-    end else if (q[0]) begin // -2
-      QNext  = {QMR,  2'b10};
-      QMNext = {QMR,  2'b01};
-    end else begin           // 0
-      QNext  = {QR,  2'b00};
-      QMNext = {QMR, 2'b11};
-    end 
-  end
-  // Final Quoteint is in the range [.5, 2)
-
-endmodule
-
-
-
-/////////
-// csa //
-/////////
-module csa #(parameter N=69) (
-  input  logic [N-1:0] in1, in2, in3, 
-  input  logic         cin, 
-  output logic [N-1:0] out1, out2
-);
-
-  // This block adds in1, in2, in3, and cin to produce 
-  // a result out1 / out2 in carry-save redundant form.
-  // cin is just added to the least significant bit and
-  // is Startuired to handle adding a negative divisor.
-  // Fortunately, the carry (out2) is shifted left by one
-  // bit, leaving room in the least significant bit to 
-  // insert cin.
-
-  assign out1 = in1 ^ in2 ^ in3;
-  assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
-		    (in2[N-2:0] & in3[N-2:0]), cin};
-endmodule
-
-module expcalc(
-  input logic  [`FMTBITS-1:0] FmtE,
-  input  logic [`NE-1:0] XExpE, YExpE,
-  input logic XZeroE, 
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic  [`NE+1:0] DivCalcExp
-  );
-    logic [`NE-2:0] Bias;
-    
-    if (`FPSIZES == 1) begin
-        assign Bias = (`NE-1)'(`BIAS); 
-
-    end else if (`FPSIZES == 2) begin
-        assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (FmtE)
-                `FMT: Bias  =  (`NE-1)'(`BIAS);
-                `FMT1: Bias = (`NE-1)'(`BIAS1);
-                `FMT2: Bias = (`NE-1)'(`BIAS2);
-                default: Bias = 'x;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (FmtE)
-                2'h3: Bias =  (`NE-1)'(`Q_BIAS);
-                2'h1: Bias =  (`NE-1)'(`D_BIAS);
-                2'h0: Bias =  (`NE-1)'(`S_BIAS);
-                2'h2: Bias =  (`NE-1)'(`H_BIAS);
-            endcase
-    end
-    // correct exponent for denormalized input's normalization shifts
-    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
-    endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 481b1b22..634ecc1d 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -38,8 +38,9 @@ module srtfsm(
   input  logic XZeroE, YZeroE, 
   input  logic XNaNE, YNaNE, 
   input  logic DivStart, 
-  input logic StallE,
-  input logic StallM,
+  input  logic StallE,
+  input  logic StallM,
+  input  logic [`DIVLEN+3:0] StickyWSA,
   input  logic [`DURLEN-1:0] Dur,
   output logic [`DURLEN-1:0] EarlyTermShiftE,
   output logic DivStickyE,
@@ -59,7 +60,14 @@ module srtfsm(
   //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
   assign DivBusy = (state == BUSY);
   assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
-  assign DivStickyE = |W;
+  // calculate sticky bit
+  //    - there is a chance that a value is subtracted infinitly, resulting in an exact QM result
+  //      this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
+  //      radix-4 division can't create a QM that continually adds 0's
+  if (`RADIX == 2)
+    assign DivStickyE = |W&~(StickyWSA == WS);
+  else
+    assign DivStickyE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index 7386332f..b9fb8bb8 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -31,7 +31,7 @@
 `include "wally-config.vh"
 
 module srtpreproc (
-  input  logic [`NF:0] XManE, YManE,
+  input  logic [`NF:0] Xm, Ym,
   output logic [`DIVLEN-1:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
@@ -49,16 +49,16 @@ module srtpreproc (
 
   // ***can probably merge X LZC with conversion
   // cout the number of leading zeros
-  lzc #(`NF+1) lzcA (XManE, XZeroCnt);
-  lzc #(`NF+1) lzcB (YManE, YZeroCnt);
+  lzc #(`NF+1) lzcA (Xm, XZeroCnt);
+  lzc #(`NF+1) lzcB (Ym, YZeroCnt);
 
   // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
   // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
 
   // assign PreprocA = ExtraA << zeroCntA;
   // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = {Xm[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {Ym[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
 
   
   assign X = PreprocX;
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 1493903e..a95a6624 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -80,7 +80,7 @@ module testbenchfp;
   logic CvtResSgnE;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
-	logic [`QLEN-1:0] Quot;
+	logic [`QLEN-1-(`RADIX/4):0] Quot;
   logic CvtResDenormUfE;
   logic [`DURLEN-1:0] EarlyTermShift;
   logic DivStart, DivBusy;

From a4cd157f0086d0ae42782a937698f1ff7fa8c894 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Fri, 15 Jul 2022 21:42:45 +0000
Subject: [PATCH 29/36] forgot some files

---
 pipelined/src/fpu/otfc.sv | 112 +++++++++++++++++
 pipelined/src/fpu/qsel.sv | 135 ++++++++++++++++++++
 pipelined/src/fpu/srt.sv  | 259 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 506 insertions(+)
 create mode 100644 pipelined/src/fpu/otfc.sv
 create mode 100644 pipelined/src/fpu/qsel.sv
 create mode 100644 pipelined/src/fpu/srt.sv

diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv
new file mode 100644
index 00000000..8d11273a
--- /dev/null
+++ b/pipelined/src/fpu/otfc.sv
@@ -0,0 +1,112 @@
+///////////////////////////////////////////
+// otfc.sv
+//
+// Written: me@KatherineParry.com, cturek@hmc.edu 
+// Modified:7/14/2022
+//
+// Purpose: On the fly conversion
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module otfc2 (
+  input  logic         qp, qz,
+  input  logic [`QLEN-1:0] Q, QM,
+  output logic [`QLEN-1:0] QNext, QMNext
+);
+  //  The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division only.
+  logic [`QLEN-2:0] QR, QMR;
+
+  assign QR  = Q[`QLEN-2:0];
+  assign QMR = QM[`QLEN-2:0];     // Shifted Q and QM
+
+  always_comb begin
+    if (qp) begin
+      QNext  = {QR,  1'b1};
+      QMNext = {QR,  1'b0};
+    end else if (qz) begin
+      QNext  = {QR,  1'b0};
+      QMNext = {QMR, 1'b1};
+    end else begin        // If qp and qz are not true, then qn is
+      QNext  = {QMR, 1'b1};
+      QMNext = {QMR, 1'b0};
+    end 
+  end
+
+endmodule
+
+
+module otfc4 (
+  input  logic [3:0]   q,
+  input  logic [`QLEN-1:0] Q, QM,
+  output logic [`QLEN-1:0] QNext, QMNext
+);
+
+  //  The on-the-fly converter transfers the quotient 
+  //  bits to the quotient as they come. 
+  //
+  //  This code follows the psuedocode presented in the 
+  //  floating point chapter of the book. Right now, 
+  //  it is written for Radix-4 division.
+  //
+  //  QM is Q-1. It allows us to write negative bits 
+  //  without using a costly CPA. 
+
+  //  QR and QMR are the shifted versions of Q and QM.
+  //  They are treated as [N-1:r] size signals, and 
+  //  discard the r most significant bits of Q and QM. 
+  logic [`QLEN-3:0] QR, QMR;
+
+  // shift Q (quotent) and QM (quotent-1)
+		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
+		// else if 	q = 1   Q = {Q, 01} 	QM = {Q, 00}	
+		// else if 	q = 0   Q = {Q, 00} 	QM = {QM, 11}	
+		// else if 	q = -1	Q = {QM, 11} 	QM = {QM, 10}
+		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
+
+  assign QR  = Q[`QLEN-3:0];
+  assign QMR = QM[`QLEN-3:0];     // Shifted Q and QM
+  always_comb begin
+    if (q[3]) begin // +2
+      QNext  = {QR,  2'b10};
+      QMNext = {QR,  2'b01};
+    end else if (q[2]) begin // +1
+      QNext  = {QR,  2'b01};
+      QMNext = {QR,  2'b00};
+    end else if (q[1]) begin // -1
+      QNext  = {QMR,  2'b11};
+      QMNext = {QMR,  2'b10};
+    end else if (q[0]) begin // -2
+      QNext  = {QMR,  2'b10};
+      QMNext = {QMR,  2'b01};
+    end else begin           // 0
+      QNext  = {QR,  2'b00};
+      QMNext = {QMR, 2'b11};
+    end 
+  end
+  // Final Quoteint is in the range [.5, 2)
+
+endmodule
diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv
new file mode 100644
index 00000000..396ca776
--- /dev/null
+++ b/pipelined/src/fpu/qsel.sv
@@ -0,0 +1,135 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu 
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module qsel2 ( // *** eventually just change to 4 bits
+  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
+  output logic         qp, qz//, qn
+);
+ 
+  logic [`DIVLEN+3:`DIVLEN]  p, g;
+  logic          magnitude, sign, cout;
+
+  // The quotient selection logic is presented for simplicity, not
+  // for efficiency.  You can probably optimize your logic to
+  // select the proper divisor with less delay.
+
+  // Quotient equations from EE371 lecture notes 13-20
+  assign p = ps ^ pc;
+  assign g = ps & pc;
+
+  assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
+  assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
+  assign sign = p[`DIVLEN+3] ^ cout;
+/*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
+			  (ps[52]^pc[52]));
+  assign #1 sign = (ps[55]^pc[55])^
+      (ps[54] & pc[54] | ((ps[54]^pc[54]) &
+			    (ps[53]&pc[53] | ((ps[53]^pc[53]) &
+						(ps[52]&pc[52]))))); */
+
+  // Produce quotient = +1, 0, or -1
+  assign qp = magnitude & ~sign;
+  assign qz = ~magnitude;
+//   assign #1 qn = magnitude & sign;
+endmodule
+
+module qsel4 (
+	input logic [`DIVLEN+3:0] D,
+	input logic [`DIVLEN+3:0] WS, WC,
+	output logic [3:0] q
+);
+	logic [6:0] Wmsbs;
+	logic [7:0] PreWmsbs;
+	logic [2:0] Dmsbs;
+	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
+	assign Wmsbs = PreWmsbs[7:1];
+	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
+	// D = 0001.xxx...
+	// Dmsbs = |   |
+  // W =      xxxx.xxx...
+	// Wmsbs = |        |
+
+	logic [3:0] QSel4[1023:0];
+
+  always_comb begin 
+    integer d, w, i, w2;
+    for(d=0; d<8; d++)
+      for(w=0; w<128; w++)begin
+        i = d*128+w;
+        w2 = w-128*(w>=64); // convert to two's complement
+        case(d)
+          0: if($signed(w2)>=$signed(12))      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-4)  QSel4[i] = 4'b0000; 
+            else if(w2>=-13) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          1: if(w2>=14)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-15) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          2: if(w2>=15)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-16) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          3: if(w2>=16)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-18) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          4: if(w2>=18)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          5: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          6: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-22) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          7: if(w2>=24)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-24) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+        endcase
+      end
+  end
+	assign q = QSel4[{Dmsbs,Wmsbs}];
+	
+endmodule
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
new file mode 100644
index 00000000..9e031511
--- /dev/null
+++ b/pipelined/src/fpu/srt.sv
@@ -0,0 +1,259 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu 
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module srt(
+  input  logic clk,
+  input  logic DivStart, 
+  input  logic DivBusy, 
+  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`NE-1:0] Xe, Ye,
+  input  logic XZeroE, YZeroE, 
+  input logic [`DIVLEN-1:0] X,
+  input logic [`DIVLEN-1:0] Dpreproc,
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input logic NegSticky,
+  output logic [`QLEN-1-(`RADIX/4):0] Quot,
+  output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
+  output logic [`DIVLEN+3:0]  StickyWSA,
+  output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
+  output logic  [`NE+1:0] DivCalcExpM,
+  output logic [`XLEN-1:0] Rem
+);
+
+
+ /* verilator lint_off UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WCA[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WS[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0]  WC[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] Q[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
+  logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
+ /* verilator lint_on UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSN, WCN;
+  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
+  logic [`NE+1:0] DivCalcExp;
+  logic [$clog2(`XLEN+1)-1:0] intExp;
+  logic           intSign;
+  logic [`QLEN-1:0] QMMux;
+
+  // Top Muxes and Registers
+  // When start is asserted, the inputs are loaded into the divider.
+  // Otherwise, the divisor is retained and the partial remainder
+  // is fed back for the next iteration.
+  //  - when the start signal is asserted X and 0 are loaded into WS and WC
+  //  - otherwise load WSA into the flipflop
+  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
+  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
+  if (`RADIX == 2) begin : nextw
+    assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
+    assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0};
+  end else begin
+    assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+    assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+  end
+
+  mux2   #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  flopen   #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
+  mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
+  flopen   #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
+  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
+  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
+
+
+  // Divisor Selections
+  // - choose the negitive version of what's being selected
+  assign DBar = ~D;
+  if(`RADIX == 4) begin : d2
+    assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
+    assign D2 = {D[`DIVLEN+2:0], 1'b0};
+  end
+
+  genvar i;
+  generate
+    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
+      divinteration divinteration(.D, .DBar, .D2, .DBar2, 
+      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
+      if(i<(`DIVCOPIES-1)) begin 
+        if (`RADIX==2)begin 
+          assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0};
+          assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0};
+        end else begin
+          assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
+          assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
+        end
+        assign Q[i+1] = QNext[i];
+        assign QM[i+1] = QMNext[i];
+      end
+    end
+  endgenerate
+
+  // if starting a new divison set Q to 0 and QM to -1
+  mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
+  flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
+  flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
+
+  assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
+  assign FirstWS = WS[0];
+  assign FirstWC = WC[0];
+  if(`RADIX==2)
+    if (`DIVCOPIES == 1)
+      assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0};
+    else
+      assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
+
+  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
+
+endmodule
+
+////////////////
+// Submodules //
+////////////////
+
+ /* verilator lint_off UNOPTFLAT */
+module divinteration (
+  input logic [`DIVLEN+3:0] D,
+  input logic [`DIVLEN+3:0]  DBar, D2, DBar2,
+  input logic [`QLEN-1:0] Q, QM,
+  input logic [`DIVLEN+3:0]  WS, WC,
+  output logic [`QLEN-1:0] QNext, QMNext, 
+  output logic [`DIVLEN+3:0]  WSA, WCA
+);
+ /* verilator lint_on UNOPTFLAT */
+
+  logic [`DIVLEN+3:0]  Dsel;
+  logic [3:0]     q;
+  logic qp, qz;//, qn;
+
+  // Quotient Selection logic
+  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
+  // q encoding:
+	// 1000 = +2
+	// 0100 = +1
+	// 0000 =  0
+	// 0010 = -1
+	// 0001 = -2
+  if(`RADIX == 2) begin : qsel
+    qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn);
+  end else begin
+    qsel4 qsel4(.D, .WS, .WC, .q);
+  end
+
+  if(`RADIX == 2) begin : dsel
+    assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D);
+  end else begin
+    always_comb
+      case (q)
+        4'b1000: Dsel = DBar2;
+        4'b0100: Dsel = DBar;
+        4'b0000: Dsel = '0;
+        4'b0010: Dsel = D;
+        4'b0001: Dsel = D2;
+        default: Dsel = 'x;
+      endcase
+  end
+  // Partial Product Generation
+  //  WSA, WCA = WS + WC - qD
+  if (`RADIX == 2) begin : csa
+    csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
+  end else begin
+    csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
+  end
+
+  if (`RADIX == 2) begin : otfc
+    otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
+  end else begin
+    otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
+  end
+
+endmodule
+
+
+/////////
+// csa //
+/////////
+module csa #(parameter N=69) (
+  input  logic [N-1:0] in1, in2, in3, 
+  input  logic         cin, 
+  output logic [N-1:0] out1, out2
+);
+
+  // This block adds in1, in2, in3, and cin to produce 
+  // a result out1 / out2 in carry-save redundant form.
+  // cin is just added to the least significant bit and
+  // is Startuired to handle adding a negative divisor.
+  // Fortunately, the carry (out2) is shifted left by one
+  // bit, leaving room in the least significant bit to 
+  // insert cin.
+
+  assign out1 = in1 ^ in2 ^ in3;
+  assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
+		    (in2[N-2:0] & in3[N-2:0]), cin};
+endmodule
+
+module expcalc(
+  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`NE-1:0] Xe, Ye,
+  input logic XZeroE, 
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  output logic  [`NE+1:0] DivCalcExp
+  );
+    logic [`NE-2:0] Bias;
+    
+    if (`FPSIZES == 1) begin
+        assign Bias = (`NE-1)'(`BIAS); 
+
+    end else if (`FPSIZES == 2) begin
+        assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (FmtE)
+                `FMT: Bias  =  (`NE-1)'(`BIAS);
+                `FMT1: Bias = (`NE-1)'(`BIAS1);
+                `FMT2: Bias = (`NE-1)'(`BIAS2);
+                default: Bias = 'x;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (FmtE)
+                2'h3: Bias =  (`NE-1)'(`Q_BIAS);
+                2'h1: Bias =  (`NE-1)'(`D_BIAS);
+                2'h0: Bias =  (`NE-1)'(`S_BIAS);
+                2'h2: Bias =  (`NE-1)'(`H_BIAS);
+            endcase
+    end
+    // correct exponent for denormalized input's normalization shifts
+    assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    endmodule
\ No newline at end of file

From 6e1d4ec4edeccec2d211dee45da04bdce3a5af2d Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 16 Jul 2022 17:43:31 -0700
Subject: [PATCH 30/36] restored intPending logic to be sticky for PLIC

---
 pipelined/src/uncore/plic_apb.sv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelined/src/uncore/plic_apb.sv b/pipelined/src/uncore/plic_apb.sv
index f83033c4..51e94d7f 100644
--- a/pipelined/src/uncore/plic_apb.sv
+++ b/pipelined/src/uncore/plic_apb.sv
@@ -172,8 +172,8 @@ module plic_apb (
   end
 
   // pending interrupt requests
-  //assign nextIntPending = (intPending | requests) & ~intInProgress; // 
-  assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
+  assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots.  Confirmed to boot successfully.
+  //assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion
   flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending);
 
   // context-dependent signals

From 2a965cf634685bee8115063a08612a27de60b9e0 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sun, 17 Jul 2022 01:39:57 +0000
Subject: [PATCH 31/36] Don't delete hdl directory at end of run

---
 synthDC/Makefile          | 2 +-
 synthDC/extractSummary.py | 7 +++++++
 synthDC/scripts/synth.tcl | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/synthDC/Makefile b/synthDC/Makefile
index 369529e3..98b71942 100755
--- a/synthDC/Makefile
+++ b/synthDC/Makefile
@@ -107,7 +107,7 @@ ifeq ($(SAIFPOWER), 1)
 	cp -f ../pipelined/regression/power.saif .
 endif
 	dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out
-	rm -rf $(OUTPUTDIR)/hdl
+#	rm -rf $(OUTPUTDIR)/hdl
 	rm -rf $(OUTPUTDIR)/WORK
 	rm -rf $(OUTPUTDIR)/alib-52
 
diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py
index 93363a06..29e1c802 100755
--- a/synthDC/extractSummary.py
+++ b/synthDC/extractSummary.py
@@ -78,6 +78,13 @@ def freqPlot(tech, width, config):
     ''' plots delay, area for syntheses with specified tech, module, width
     '''
 
+    current_directory = os.getcwd()
+    final_directory = os.path.join(current_directory, 'plots/wally')
+#    if not os.path.exists(final_directory):
+#        os.makedirs(final_directory)
+#    os.chdir(final_directory)
+
+
     freqsL, delaysL, areasL = ([[], []] for i in range(3))
     for oneSynth in allSynths:
         if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special):
diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl
index 251522dc..9b72849f 100755
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@@ -56,7 +56,7 @@ set vhdlout_show_unconnected_pins "true"
 # Due to parameterized Verilog must use analyze/elaborate and not 
 # read_verilog/vhdl (change to pull in Verilog and/or VHDL)
 #
-set alib_library_analysis_path ./$outputDir
+#set alib_library_analysis_path ./$outputDir
 define_design_lib WORK -path ./$outputDir/WORK
 analyze -f sverilog -lib WORK $my_verilog_files
 elaborate $my_toplevel -lib WORK 

From 3815f197633a140c4237566e460cf8690ec86f88 Mon Sep 17 00:00:00 2001
From: James Stine <jstine@yukari.ecen.okstate.edu>
Date: Sun, 17 Jul 2022 11:06:30 -0500
Subject: [PATCH 32/36] Add import os in extractSummary.py

---
 synthDC/extractSummary.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py
index 29e1c802..d4f86fb3 100755
--- a/synthDC/extractSummary.py
+++ b/synthDC/extractSummary.py
@@ -11,6 +11,7 @@ import numpy as np
 from ppa.ppaAnalyze import noOutliers
 from matplotlib import ticker
 import argparse
+import os
 
 
 def synthsintocsv():

From 2753699fb2737d39027f7801f5686b9722f1a7f2 Mon Sep 17 00:00:00 2001
From: James Stine <jstine@yukari.ecen.okstate.edu>
Date: Sun, 17 Jul 2022 13:00:44 -0500
Subject: [PATCH 33/36] Add back extractSummary mkdir plots

---
 synthDC/extractSummary.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py
index d4f86fb3..978365b1 100755
--- a/synthDC/extractSummary.py
+++ b/synthDC/extractSummary.py
@@ -60,6 +60,7 @@ def synthsintocsv():
             writer.writerow([width, config, special, tech, freq, delay, area])
     file.close()
 
+	
 def synthsfromcsv(filename):
     Synth = namedtuple("Synth", "width config special tech freq delay area")
     with open(filename, newline='') as csvfile:
@@ -75,16 +76,15 @@ def synthsfromcsv(filename):
             allSynths[i] = Synth(*allSynths[i])
     return allSynths
 
+
 def freqPlot(tech, width, config):
     ''' plots delay, area for syntheses with specified tech, module, width
     '''
 
     current_directory = os.getcwd()
     final_directory = os.path.join(current_directory, 'plots/wally')
-#    if not os.path.exists(final_directory):
-#        os.makedirs(final_directory)
-#    os.chdir(final_directory)
-
+    if not os.path.exists(final_directory):
+        os.makedirs(final_directory)
 
     freqsL, delaysL, areasL = ([[], []] for i in range(3))
     for oneSynth in allSynths:
@@ -159,6 +159,7 @@ def areaDelay(tech, delays, areas, labels, fig, ax, norm=False):
 
     return fig
 
+
 def plotFeatures(tech, width, config):
     delays, areas, labels = ([] for i in range(3))
     freq = techdict[tech].targfreq
@@ -176,7 +177,8 @@ def plotFeatures(tech, width, config):
     titlestr = tech+'_'+width+config
     plt.title(titlestr)
     plt.savefig('./plots/wally/features_'+titlestr+'.png')
-    
+
+	
 def plotConfigs(tech, special=''):
     delays, areas, labels = ([] for i in range(3))
     freq = techdict[tech].targfreq
@@ -215,7 +217,8 @@ def normAreaDelay(special=''):
     ax.set_ylabel('Area (add32)')        
     ax.legend(handles = fullLeg, loc='upper left')
     plt.savefig('./plots/wally/normAreaDelay.png')
-    
+
+	
 def addFO4axis(fig, ax, tech):
     fo4 = techdict[tech].fo4
 

From 5bb14788596d9f1cc53803f6d1b1fac06ee1fd93 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 18 Jul 2022 17:31:17 +0000
Subject: [PATCH 34/36] renamed signals in ocde to match book

---
 pipelined/src/fpu/divshiftcalc.sv    |  28 ++--
 pipelined/src/fpu/fcvt.sv            |   2 +-
 pipelined/src/fpu/flags.sv           |   6 +-
 pipelined/src/fpu/fmashiftcalc.sv    |  26 ++--
 pipelined/src/fpu/fpu.sv             |   6 +-
 pipelined/src/fpu/postprocess.sv     |  46 +++----
 pipelined/src/fpu/resultsign.sv      |  15 +--
 pipelined/src/fpu/round.sv           | 186 +++++++++++++--------------
 pipelined/src/fpu/roundsign.sv       |   8 +-
 pipelined/src/fpu/shiftcorrection.sv |  16 +--
 pipelined/testbench/testbench-fp.sv  |   6 +-
 11 files changed, 171 insertions(+), 174 deletions(-)

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index af321b25..3fbc9419 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -1,10 +1,10 @@
 `include "wally-config.vh"
 
 module divshiftcalc(
-    input logic  [`QLEN-1-(`RADIX/4):0] Quot,
+    input logic  [`QLEN-1-(`RADIX/4):0] DivQm,
     input logic  [`FMTBITS-1:0] Fmt,
     input logic [`DURLEN-1:0] DivEarlyTermShift,
-    input logic [`NE+1:0] DivCalcExp,
+    input logic [`NE+1:0] DivQe,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
     output logic DivResDenorm,
@@ -14,21 +14,21 @@ module divshiftcalc(
 
     // is the result denromalized
     // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
-    assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
+    assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
 
     // if the result is denormalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  .00xxxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = +1
+    //  00000000x.xxxxxx...                     Exp = DivQe
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivQe+NF+1
+    //  .00xxxxxxxxxxxxx... << DivQe+NF+1  Exp = +1
     //  .0000xxxxxxxxxxx... >> 1                Exp = 1
-    // Left shift amount  = DivCalcExp+NF+1-1
-    assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
+    // Left shift amount  = DivQe+NF+1-1
+    assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
     // if the result is normalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  00000000.xxxxxxx... << NF               Exp = DivCalcExp+1
-    //  00000000x.xxxxxx... << NF               Exp = DivCalcExp (extra shift done afterwards)
-    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1 (determined after)
+    //  00000000x.xxxxxx...                     Exp = DivQe
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivQe+NF+1
+    //  00000000.xxxxxxx... << NF               Exp = DivQe+1
+    //  00000000x.xxxxxx... << NF               Exp = DivQe (extra shift done afterwards)
+    //  00000000xx.xxxxx... << 1?               Exp = DivQe-1 (determined after)
     // inital Left shift amount  = NF
     // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
     assign NormShift = (`NE+2)'(`NF);
@@ -36,6 +36,6 @@ module divshiftcalc(
     // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
     assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};
 
-    assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
+    assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
 
 endmodule
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index 4820cf28..b9932523 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -127,7 +127,7 @@ module fcvt (
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
     assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
                     ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : 
-                              (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}};
+                              (LeadingZeros);
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 4e16bc96..71f2a919 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -48,10 +48,10 @@ module flags(
     input logic                 DivOp,                  // conversion opperation?
     input logic                 FmaOp,                  // Fma opperation?
     input logic  [`NE+1:0]      FullRe,             // Re with bits to determine sign and overflow
-    input logic  [`NE+1:0]      Nexp,               // exponent of the normalized sum
+    input logic  [`NE+1:0]      Me,               // exponent of the normalized sum
     input logic  [1:0]          CvtNegResMsbs,             // the negitive integer result's most significant bits
     input logic                 FmaAs, FmaPs,        // the product and modified Z signs
-    input logic                 R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
+    input logic                 R, UfL, S, UfPlus1, // bits used to determine rounding
     output logic                DivByZero,
     output logic                IntInvalid, Invalid, Overflow, // flags used to select the res
     output logic [4:0]          PostProcFlg // flags
@@ -127,7 +127,7 @@ module flags(
     //                  |                    |                    |                                      |                     and if the result is not exact
     //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
     //                  |                    |                    |                                      |                     |               |
-    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
+    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero);
 
     // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
     //      - Don't set the underflow flag if an underflowed res isn't outputed
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index ae974eb0..a6c1a1c6 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -35,7 +35,7 @@ module fmashiftcalc(
     input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
     input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
     input logic                         FmaKillProd,  // is the product set to zero
-    output logic [`NE+1:0]              FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    output logic [`NE+1:0]              FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
     output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
@@ -57,28 +57,28 @@ module fmashiftcalc(
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
-        assign FmaConvNormSumExp = NormSumExp;
+        assign FmaNe = NormSumExp;
 
     end else if (`FPSIZES == 2) begin
-        assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+        assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
 
     end else if (`FPSIZES == 3) begin
         always_comb begin
             case (Fmt)
-                `FMT: FmaConvNormSumExp = NormSumExp;
-                `FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
-                `FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
-                default: FmaConvNormSumExp = {`NE+2{1'bx}};
+                `FMT: FmaNe = NormSumExp;
+                `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+                `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
+                default: FmaNe = {`NE+2{1'bx}};
             endcase
         end
 
     end else if (`FPSIZES == 4) begin
         always_comb begin
             case (Fmt)
-                2'h3: FmaConvNormSumExp = NormSumExp;
-                2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
-                2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
-                2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
+                2'h3: FmaNe = NormSumExp;
+                2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
+                2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
+                2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
             endcase
         end
 
@@ -144,11 +144,11 @@ module fmashiftcalc(
     //      - if kill prod dont add to exp
 
     // Determine if the result is denormal
-    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
+    // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 1bbd0aea..65be2997 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -381,10 +381,10 @@ module fpu (
    assign FpLoadStoreM = FResSelM[1];
 
    postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
-                           .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
-                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
+                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM),
+                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
                            .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
-                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
+                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
                            .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
    // FPU flag selection - to privileged
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index e0eb50ac..f9ccd255 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -48,17 +48,17 @@ module postprocess (
     input logic                             FmaPs,      // the product's sign
     input logic  [`NE+1:0]                  FmaPe,       // Product exponent
     input logic  [3*`NF+5:0]                FmaSm,       // the positive sum
-    input logic                             FmaZmSticky,  // sticky bit that is calculated during alignment
+    input logic                             FmaZmS,  // sticky bit that is calculated during alignment
     input logic                             FmaKillProd,      // set the product to zero before addition if the product is too small to matter
     input logic                             FmaNegSum,    // was the sum negitive
     input logic                             FmaInvA,      // do you invert Z
     input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
     //divide signals
     input logic  [`DURLEN-1:0]              DivEarlyTermShift,
-    input logic                             DivSticky,
+    input logic                             DivS,
     input logic                             DivDone,
-    input logic  [`NE+1:0]                  DivCalcExp,
-    input logic  [`QLEN-1-(`RADIX/4):0]                Quot,
+    input logic  [`NE+1:0]                  DivQe,
+    input logic  [`QLEN-1-(`RADIX/4):0]                DivQm,
     // conversion signals
     input logic                             CvtCs,     // the result's sign
     input logic  [`NE:0]                    CvtCe,    // the calculated expoent
@@ -77,9 +77,9 @@ module postprocess (
     logic Ws;
     logic [`NF-1:0] Rf; // Result fraction
     logic [`NE-1:0] Re;  // Result exponent
-    logic Nsgn;
-    logic [`NE+1:0] Nexp;
-    logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
+    logic Ms;
+    logic [`NE+1:0] Me;
+    logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
     logic [`NE+1:0] FullRe;  // Re with bits to determine sign and overflow
     logic S;           // S bit
     logic UfPlus1;                    // do you add one (for determining underflow flag)
@@ -89,19 +89,19 @@ module postprocess (
     logic [`NORMSHIFTSZ-1:0] Shifted;    // the shifted result
     logic Plus1;      // add one to the final result?
     logic IntInvalid, Overflow, Invalid; // flags
-    logic UfLSBRes;
+    logic UfL;
     logic [`FMTBITS-1:0] OutFmt;
     // fma signals
     logic [`NE+1:0] FmaSe;     // exponent of the normalized sum
     logic FmaSZero;        // is the sum zero
     logic [3*`NF+8:0] FmaShiftIn;        // shift input
-    logic [`NE+1:0] FmaConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic [`NE+1:0] FmaNe;          // exponent of the normalized sum not taking into account denormal or zero results
     logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
     logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt;   // normalization shift count
     // division singals
     logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
     logic [`NORMSHIFTSZ-1:0] DivShiftIn;
-    logic [`NE+1:0] DivCorrExp;
+    logic [`NE+1:0] Qe;
     logic DivByZero;
     logic DivResDenorm;
     logic [`NE+1:0] DivDenormShift;
@@ -150,9 +150,9 @@ module postprocess (
 
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
+    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
                           .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSel)
@@ -181,9 +181,9 @@ module postprocess (
     
     normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
-    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp,
-                                .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
-                                .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
+    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
+                                .DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
+                                .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -197,19 +197,19 @@ module postprocess (
 
                           
     roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, 
-                          .Xs, .Ys, .CvtCs, .Nsgn);
+                          .Xs, .Ys, .CvtCs, .Ms);
 
-    round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
-                .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
-                .DivSticky, .DivDone,
-                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp);
+    round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
+                .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
+                .DivS, .DivDone,
+                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
     ///////////////////////////////////////////////////////////////////////////////
 
     resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
-                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws);
+                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Flags
@@ -218,8 +218,8 @@ module postprocess (
     flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
                 .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
                 .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
-                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
-                .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
+                .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
+                .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Select the result
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index e6de0c18..e1ea5e41 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -39,28 +39,25 @@ module resultsign(
     input logic         Mult,
     input logic         R,
     input logic         S,
-    input logic         Nsgn,
+    input logic         Ms,
     output logic        Ws
 );
 
-    logic ZeroSgn;
-    logic InfSgn;
-    logic Underflow;
-    // logic ResultSgnTmp;
+    logic Zeros;
+    logic Infs;
 
     // Determine the sign if the sum is zero
     //      if cancelation then 0 unless round to -infinity
     //      if multiply then Psgn
     //      otherwise psign
-    assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
-    assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
+    assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
 
 
     // is the result negitive
     //  if p - z is the Sum negitive
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
-    assign InfSgn = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn;
+    assign Infs = ZInf ? FmaAs : FmaPs;
+    assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 38bacce0..6132dba4 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -46,29 +46,29 @@ module round(
     input logic  [1:0]              PostProcSel,
     input logic                     CvtResDenormUf,
     input logic                     CvtResUf,
-    input logic  [`CORRSHIFTSZ-1:0] Nfrac,
-    input logic                     FmaZmSticky,  // addend's sticky bit
+    input logic  [`CORRSHIFTSZ-1:0] Mf,
+    input logic                     FmaZmS,  // addend's sticky bit
     input logic  [`NE+1:0]          FmaSe,         // exponent of the normalized sum
-    input logic                     Nsgn,      // the result's sign
+    input logic                     Ms,      // the result's sign
     input logic  [`NE:0]            CvtCe,    // the calculated expoent
-    input logic  [`NE+1:0]          DivCorrExp,    // the calculated expoent
-    input logic                     DivSticky,             // sticky bit
+    input logic  [`NE+1:0]          Qe,    // the calculated expoent
+    input logic                     DivS,             // sticky bit
     output logic                    UfPlus1,  // do you add or subtract on from the result
     output logic [`NE+1:0]          FullRe,      // Re with bits to determine sign and overflow
     output logic [`NF-1:0]          Rf,         // Result fraction
     output logic [`NE-1:0]          Re,          // Result exponent
     output logic                    S,             // sticky bit
-    output logic [`NE+1:0]          Nexp,
+    output logic [`NE+1:0]          Me,
     output logic                    Plus1,
-    output logic                    R, UfLSBRes // bits needed to calculate rounding
+    output logic                    R, UfL // bits needed to calculate rounding
 );
-    logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
+    logic           L;         // bit used for rounding - least significant bit of the normalized sum
     logic           UfCalcPlus1; 
-    logic           NormSumSticky;  // normalized sum's sticky bit
-    logic           UfSticky;   // sticky bit for underlow calculation
+    logic           NormS;  // normalized sum's sticky bit
+    logic           UfS;   // sticky bit for underlow calculation
     logic [`NF-1:0] RoundFrac;
     logic           FpRes, IntRes;
-    logic           UfRound;
+    logic           UfR;
     logic           FpRound, FpLSBRes, FpUfRound;
     logic           CalcPlus1, FpPlus1;
     logic [`FLEN:0] RoundAdd;           // how much to add to the result
@@ -114,61 +114,61 @@ module round(
     //      |    NF     |1|1|
     //                     ^    ^ if floating point result
     //                     ^ if not an FMA result
-        if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                 (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                 (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
     //     2: NF > XLEN
-        if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
-                                                 (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
+                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
     end else if (`FPSIZES == 2) begin
         // XLEN is either 64 or 32
         // so half and single are always smaller then XLEN
 
         // 1: XLEN > NF   > NF1
-        if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
         // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
         // 3: NF   > NF1  > XLEN
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
     end else if (`FPSIZES == 3) begin
         // 1: XLEN > NF   > NF1
-        if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
         // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
         // 3: NF   > NF1  > XLEN
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
     end else if (`FPSIZES == 4) begin
         // Quad precision will always be greater than XLEN
         // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                  (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
         // 3: NF   > NF1  > XLEN
         // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
 
     end
     
@@ -176,37 +176,37 @@ module round(
 
     // only add the Addend sticky if doing an FMA opperation
     //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-    assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
+    assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
     
     // determine round and LSB of the rounded value
     //      - underflow round bit is used to determint the underflow flag
     if (`FPSIZES == 1) begin
-        assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
-        assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
-        assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
+        assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
+        assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
+        assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
 
     end else if (`FPSIZES == 2) begin
-        assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
-        assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
-        assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
+        assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
+        assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
+        assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
 
     end else if (`FPSIZES == 3) begin
         always_comb
             case (OutFmt)
                 `FMT: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
                 end
                 `FMT1: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
                 end
                 `FMT2: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
                 end
                 default: begin
                     FpRound = 1'bx;
@@ -218,55 +218,55 @@ module round(
         always_comb
             case (OutFmt)
                 2'h3: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
                 end
                 2'h1: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
                 end
                 2'h0: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
                 end
                 2'h2: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
                 end
             endcase
     end
 
-    assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
-    assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
-    assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
+    assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
+    assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
+    assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
 
     // used to determine underflow flag
-    assign UfLSBRes = FpRound;
+    assign UfL = FpRound;
     // determine sticky
-    assign S = UfSticky | UfRound;
+    assign S = UfS | UfR;
 
 
     always_comb begin
         // Determine if you add 1
         case (Frm)
-            3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even
+            3'b000: CalcPlus1 = R & (S| L);//round to nearest even
             3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = Nsgn;//round down
-            3'b011: CalcPlus1 = ~Nsgn;//round up
+            3'b010: CalcPlus1 = Ms;//round down
+            3'b011: CalcPlus1 = ~Ms;//round up
             3'b100: CalcPlus1 = R;//round to nearest max magnitude
             default: CalcPlus1 = 1'bx;
         endcase
         // Determine if you add 1 (for underflow flag)
         case (Frm)
-            3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even
+            3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
             3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = Nsgn;//round down
-            3'b011: UfCalcPlus1 = ~Nsgn;//round up
-            3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude
+            3'b010: UfCalcPlus1 = Ms;//round down
+            3'b011: UfCalcPlus1 = ~Ms;//round up
+            3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
             default: UfCalcPlus1 = 1'bx;
         endcase
    
@@ -275,7 +275,7 @@ module round(
     // If an answer is exact don't round
     assign Plus1 = CalcPlus1 & (S | R);
     assign FpPlus1 = Plus1&~(ToInt&CvtOp);
-    assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
+    assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky
 
     // Compute rounded result
     if (`FPSIZES == 1) begin
@@ -295,19 +295,19 @@ module round(
         assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
 
     // determine the result to be roundned
-    assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+    assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
     
     always_comb
         case(PostProcSel)
-            2'b10: Nexp = FmaSe; // fma
-            2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
-            2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
-            default: Nexp = '0; 
+            2'b10: Me = FmaSe; // fma
+            2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
+            2'b01: Me = DivDone ? Qe : '0; // divide
+            default: Me = '0; 
         endcase
 
     // round the result
     //      - if the fraction overflows one should be added to the exponent
-    assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd;
+    assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
     assign Re = FullRe[`NE-1:0];
 
 
diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv
index 22686b24..55e322bc 100644
--- a/pipelined/src/fpu/roundsign.sv
+++ b/pipelined/src/fpu/roundsign.sv
@@ -38,11 +38,11 @@ module roundsign(
     input logic         DivOp,
     input logic         CvtOp,
     input logic         CvtCs,
-    output logic        Nsgn
+    output logic        Ms
 );
 
     logic FmaResSgnTmp;
-    logic DivSgn;
+    logic Qs;
 
     // is the result negitive
     //  if p - z is the Sum negitive
@@ -52,9 +52,9 @@ module roundsign(
 
     // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
 
-    assign DivSgn = Xs^Ys;
+    assign Qs = Xs^Ys;
 
     // Sign for rounding calulation
-    assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
+    assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index ecfd9ba0..71a2393a 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -33,13 +33,13 @@ module shiftcorrection(
     input logic                     FmaOp,
     input logic                     DivOp,
     input logic                     DivResDenorm,
-    input logic  [`NE+1:0]          DivCalcExp,
+    input logic  [`NE+1:0]          DivQe,
     input logic  [`NE+1:0]          DivDenormShift,
-    input logic  [`NE+1:0]          FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic  [`NE+1:0]          FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     input logic                     FmaSZero,
-    output logic [`CORRSHIFTSZ-1:0] Nfrac,         // the shifted sum before LZA correction
-    output logic [`NE+1:0]          DivCorrExp,
+    output logic [`CORRSHIFTSZ-1:0] Mf,         // the shifted sum before LZA correction
+    output logic [`NE+1:0]          Qe,
     output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
@@ -53,16 +53,16 @@ module shiftcorrection(
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
     assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
+    assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
-    assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
+    assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
     // the quotent is in the range [.5,2) if there is no early termination
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
-    assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
+    assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index a95a6624..b90c3d3d 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -679,13 +679,13 @@ module testbenchfp;
           .Pe, .ZmSticky, .KillProd); 
               
   postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
-              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
-              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
+              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky),
               .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-              .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
+              .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
               .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
               .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   

From 921debf930d74f7c572ab708886335938cd0449a Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 18 Jul 2022 17:51:18 +0000
Subject: [PATCH 35/36] removed underflow from inexactct calculation

---
 pipelined/regression/sim-wally   | 2 +-
 pipelined/src/fpu/flags.sv       | 2 +-
 pipelined/testbench/testbench.sv | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally
index 51c8b3ed..6163ab8b 100755
--- a/pipelined/regression/sim-wally
+++ b/pipelined/regression/sim-wally
@@ -1,2 +1,2 @@
-vsim -do "do wally-pipelined.do rv64gc arch64d"
+vsim -do "do wally-pipelined.do rv32gc wally32periph"
 
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 71f2a919..6b1bc638 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -131,7 +131,7 @@ module flags(
 
     // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
     //      - Don't set the underflow flag if an underflowed res isn't outputed
-    assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
+    assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero);
 
     //                  if the res is too small to be represented and not 0
     //                  |                                     and if the res is not invalid (outside the integer bounds)
diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv
index 6d537b14..0fb5f5e6 100644
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@@ -114,7 +114,7 @@ logic [3:0] dummy;
         "arch32f":      if (`F_SUPPORTED) tests = arch32f;
         "imperas32i":                     tests = imperas32i;
         "imperas32f":   if (`F_SUPPORTED) tests = imperas32f;
-        "wally32d":     if (`D_SUPPORTED) tests = wally32d;
+        // "wally32d":     if (`D_SUPPORTED) tests = wally32d;
         "imperas32m":   if (`M_SUPPORTED) tests = imperas32m;
         "wally32a":     if (`A_SUPPORTED) tests = wally32a;
         "imperas32c":   if (`C_SUPPORTED) tests = imperas32c;

From 3f5a5e10936714c1dd7eca07a07269218a0d5e2c Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Mon, 18 Jul 2022 12:13:15 -0700
Subject: [PATCH 36/36] added the sail change to spike to let it all run
 normally

---
 tests/riscof/spike/riscof_spike.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py
index 88a6269e..fd429395 100644
--- a/tests/riscof/spike/riscof_spike.py
+++ b/tests/riscof/spike/riscof_spike.py
@@ -108,7 +108,7 @@ class spike(pluginTemplate):
 
       #TODO: The following assumes you are using the riscv-gcc toolchain. If
       #      not please change appropriately
-      self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ')
+      self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 '))
 
     def runTests(self, testList):
 
@@ -158,7 +158,12 @@ class spike(pluginTemplate):
 	  # echo statement.
           if self.target_run:
             # set up the simulation command. Template is for spike. Please change.
-            simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
+            if ('NO_SAIL=True' in testentry['macros']):
+                # if the tests can't run on SAIL we copy the reference output to the src directory
+                reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test))
+                simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying
+            else:
+                simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
           else:
             simcmd = 'echo "NO RUN"'