From 5133b08161834738cb555762d6325fd8785400f0 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 23 Jun 2022 21:38:04 +0000
Subject: [PATCH 1/6] generate qsel4 in verilog

---
 pipelined/regression/wave-fpu.do    |  3 ++
 pipelined/srt/srt-radix4.sv         | 52 ++++++++++++++++++++++++++++-
 pipelined/testbench/testbench-fp.sv |  6 +++-
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 906eb2560..60835ef67 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -22,3 +22,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
+add wave -group {Testbench} -noupdate /testbenchfp/*
+add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index 6894a0f9c..52bd4c200 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -164,7 +164,57 @@ module qsel4 (
 	// Wmsbs = |        |
 
 	logic [3:0] QSel4[1023:0];
-	initial $readmemh("../srt/qsel4.dat", QSel4);
+
+  initial begin 
+    integer d, w, i, w2;
+    for(d=0; d<8; d++)
+      for(w=0; w<128; w++)begin
+        i = d*128+w;
+        w2 = w-128*(w>=64); // convert to two's complement
+        case(d)
+          0: if($signed(w2)>=$signed(12))      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-4)  QSel4[i] = 4'b0000; 
+            else if(w2>=-13) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          1: if(w2>=14)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-15) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          2: if(w2>=15)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-16) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          3: if(w2>=16)      QSel4[i] = 4'b1000;
+            else if(w2>=4)   QSel4[i] = 4'b0100; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; 
+            else if(w2>=-18) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          4: if(w2>=18)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          5: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=6)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-20) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          6: if(w2>=20)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-22) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+          7: if(w2>=24)      QSel4[i] = 4'b1000;
+            else if(w2>=8)   QSel4[i] = 4'b0100; 
+            else if(w2>=-8)  QSel4[i] = 4'b0000; 
+            else if(w2>=-24) QSel4[i] = 4'b0010; 
+            else            QSel4[i] = 4'b0001; 
+        endcase
+      end
+  end
 	assign q = QSel4[{Dmsbs,Wmsbs}];
 	
 endmodule
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 70787b3cb..7a5514901 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -899,7 +899,7 @@ module readvectors (
 
   // apply test vectors on rising edge of clk
   // Format of vectors Inputs(1/2/3)_AnsFlg
-  always @(TestNum) begin
+  always @(VectorNum) begin
     #1; 
     AnsFlg = TestVector[4:0];
     DivStart = 1'b0;
@@ -971,6 +971,7 @@ module readvectors (
             X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
             Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
             Ans = TestVector[8+(`Q_LEN-1):8];
+            if (~clk) #5;
             DivStart = 1'b1; #10 // one clk cycle
             DivStart = 1'b0;
           end
@@ -978,6 +979,7 @@ module readvectors (
             X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
             Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
             Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
+            if (~clk) #5;
             DivStart = 1'b1; #10
             DivStart = 1'b0;
           end
@@ -985,6 +987,7 @@ module readvectors (
             X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
             Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
             Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
+            if (~clk) #5;
             DivStart = 1'b1; #10
             DivStart = 1'b0;
           end
@@ -992,6 +995,7 @@ module readvectors (
             X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
             Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
             Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
+            if (~clk) #5;
             DivStart = 1'b1; #10
             DivStart = 1'b0;
           end

From ded2631567773884c2339699225e28344b2b79eb Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Thu, 23 Jun 2022 14:39:53 -0700
Subject: [PATCH 2/6] Removed big64.txt reference, fixing a warning

---
 pipelined/src/generic/flop/bram1p1rw.sv | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv
index d0d3c40a8..51fe54214 100644
--- a/pipelined/src/generic/flop/bram1p1rw.sv
+++ b/pipelined/src/generic/flop/bram1p1rw.sv
@@ -54,10 +54,6 @@ module bram1p1rw
   logic [DATA_WIDTH-1:0] 			 RAM [(2**ADDR_WIDTH)-1:0];
   integer 							 i;
 
-  initial begin
-	$readmemh("big64.txt", RAM);
-  end
-
   always @ (posedge clk) begin
 	dout <= RAM[addr];    
 	if(we) begin

From b54d84195f868103408fafc88ea9b5b3b083b246 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 23 Jun 2022 22:36:19 +0000
Subject: [PATCH 3/6] added radix-4 0/d handling

---
 pipelined/config/rv64fp/wally-config.vh |  2 +-
 pipelined/src/fpu/postprocess.sv        | 10 ++--
 pipelined/src/fpu/round.sv              |  1 +
 pipelined/srt/srt-radix4.sv             | 68 ++++++++++++++-----------
 pipelined/testbench/testbench-fp.sv     |  6 +--
 5 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index bcc791338..68b3b84c3 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -32,7 +32,7 @@
 `define DESIGN_COMPILER 0
 
 // RV32 or RV64: XLEN = 32 or 64
-`define XLEN 64
+`define XLEN 32
 
 // IEEE 754 compliance
 `define IEEE754 0
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 4b2870da4..9138f9dfd 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -59,7 +59,7 @@ module postprocess(
     input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
     input logic             IntZeroM,         // is the input zero
     input logic [1:0] PostProcSelM, // select result to be written to fp register
-    input logic [`DIVLEN-1:0]   Quot,
+    input logic [`DIVLEN+2:0]   Quot,
     output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
     output logic    [4:0]          PostProcFlgM,
     output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
@@ -84,6 +84,7 @@ module postprocess(
     logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
     logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt;   // normalization shift count
     logic [$clog2(`NORMSHIFTSZ)-1:0]  ShiftAmt;   // normalization shift count
+    logic [$clog2(`NORMSHIFTSZ)-1:0]  DivShiftAmt;
     logic [`NORMSHIFTSZ-1:0]            ShiftIn;        // is the sum zero
     logic [`NORMSHIFTSZ-1:0]    Shifted;    // the shifted result
     logic                   Plus1;      // add one to the final result?
@@ -137,6 +138,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
+    divshiftcalc divshiftcalc(.Quot, .DivShiftAmt);
 
     always_comb
         case(PostProcSelM)
@@ -149,8 +151,8 @@ module postprocess(
                 ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
             end
             2'b01: begin //div ***prob can take out
-                ShiftAmt = {$clog2(`NORMSHIFTSZ){1'b0}};//{DivShiftAmt};
-                ShiftIn =  {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}};
+                ShiftAmt = DivShiftAmt;
+                ShiftIn =  {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
             end
             default: begin 
                 ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
@@ -175,7 +177,7 @@ module postprocess(
 
     round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM,
                 .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
-                .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
+                .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Sign calculation
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 8e3b9fe4a..1fd471e9d 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -11,6 +11,7 @@ module round(
     input logic  [`FMTBITS-1:0] OutFmt,       // precision 1 = double 0 = single
     input logic  [2:0]          FrmM,       // rounding mode
     input logic                 FmaOp,
+    input logic                 DivOp,
     input logic [1:0] PostProcSelM,
     input logic                 CvtResDenormUfM,
     input logic                 ToInt,
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index 52bd4c200..8fd8d5419 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -34,14 +34,15 @@ module srtradix4 (
   input  logic clk,
   input  logic DivStart, 
   input  logic [`NE-1:0] XExpE, YExpE,
-  input  logic [`NF-1:0] XFrac, YFrac,
+  input  logic [`NF:0] XManE, YManE,
   input  logic [`XLEN-1:0] SrcA, SrcB,
+  input  logic XZeroE,
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       DivDone,
-  output logic [`DIVLEN-1:0] Quot,
+  output logic [`DIVLEN+2:0] Quot,
   output logic [`XLEN-1:0] Rem, // *** later handle integers
   output logic [`NE:0] DivCalcExpE
 );
@@ -49,14 +50,15 @@ module srtradix4 (
   // logic           qp, qz, qm; // quotient is +1, 0, or -1
   logic [3:0]     q;
   logic [`NE:0] DivCalcExp;
-  logic [`DIVLEN-1:0]  X, Dpreproc;
+  logic [`DIVLEN:0]    X;
+  logic [`DIVLEN-1:0]  Dpreproc;
   logic [`DIVLEN+3:0]  WS, WSA, WSN;
   logic [`DIVLEN+3:0]  WC, WCA, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
  
-  srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
+  srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -68,7 +70,7 @@ module srtradix4 (
   //  - otherwise load WSA into the flipflop
   //  *** what does N and A stand for?
   //  *** change shift amount for radix4
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, DivStart, WSN);
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
   flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
   mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
@@ -110,9 +112,9 @@ module srtradix4 (
   csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
   
   //*** change for radix 4
-  otfc4 otfc4(clk, DivStart, q, Quot);
+  otfc4 otfc4(.clk, .DivStart, .q, .Quot);
 
-  expcalc expcalc(.XExpE, .YExpE, .DivCalcExp);
+  expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
 
   divcounter divcounter(clk, DivStart, DivDone);
 
@@ -224,39 +226,42 @@ endmodule
 ///////////////////
 module srtpreproc (
   input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic [`NF-1:0] XFrac, YFrac,
+  input  logic [`NF:0] XManE, YManE,
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
-  output logic [`DIVLEN-1:0] X, D,
+  output logic [`DIVLEN:0] X,
+  output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
   output logic       intSign // Quotient integer sign
 );
 
-  logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
-  logic  [`XLEN-1:0] PosA, PosB;
-  logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+  // logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
+  // logic  [`XLEN-1:0] PosA, PosB;
+  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+  logic  [`DIVLEN:0] PreprocA, PreprocX;
+  logic  [`DIVLEN-1:0] PreprocB, PreprocY;
 
-  assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
-  assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
+  // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
+  // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
 
-  lzc #(`XLEN) lzcA (PosA, zeroCntA);
-  lzc #(`XLEN) lzcB (PosB, zeroCntB);
+  // lzc #(`XLEN) lzcA (PosA, zeroCntA);
+  // lzc #(`XLEN) lzcB (PosB, zeroCntB);
 
-  assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
-  assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
+  // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
+  // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
 
-  assign PreprocA = ExtraA << zeroCntA;
-  assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
+  // assign PreprocA = ExtraA << zeroCntA;
+  // assign PreprocB = ExtraB << (zeroCntB + 1);
+  assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
 
   
   assign X = Int ? PreprocA : PreprocX;
-  assign D = Int ? PreprocB : PreprocY;
-  assign intExp = zeroCntB - zeroCntA + 1;
-  assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
+  assign Dpreproc = Int ? PreprocB : PreprocY;
+  // assign intExp = zeroCntB - zeroCntA + 1;
+  // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 endmodule
 
 ///////////////////////////////////
@@ -266,7 +271,7 @@ module otfc4 (
   input  logic         clk,
   input  logic         DivStart,
   input  logic [3:0]   q,
-  output logic [`DIVLEN-1:0] Quot
+  output logic [`DIVLEN+2:0] Quot
 );
 
   //  The on-the-fly converter transfers the quotient 
@@ -278,7 +283,7 @@ module otfc4 (
   //
   //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
-  logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
+  logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
   //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
@@ -286,7 +291,7 @@ module otfc4 (
   // if starting a new divison set Q to 0 and QM to -1
   mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
   mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flop #(`DIVLEN+3) Qreg(clk, QMux, Q);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
   flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   // shift Q (quotent) and QM (quotent-1)
@@ -298,7 +303,7 @@ module otfc4 (
     // *** how does the 0 concatination numbers work?
 
   always_comb begin
-    QR  = Q[`DIVLEN:0];
+    QR  = Quot[`DIVLEN:0];
     QMR = QM[`DIVLEN:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
@@ -318,7 +323,7 @@ module otfc4 (
     end 
   end
   // Quot is in the range [.5, 2) so normalize the result if nesissary
-  assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
+  // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
 
 endmodule
 
@@ -352,9 +357,10 @@ endmodule
 //////////////
 module expcalc(
   input logic  [`NE-1:0] XExpE, YExpE,
+  input logic XZeroE,
   output logic [`NE:0] DivCalcExp
 );
 
-  assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS);
+  assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
 
 endmodule
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 7a5514901..e8afb299b 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -53,6 +53,7 @@ module testbenchfp;
   logic CvtResSgnE;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
+	logic [`DIVLEN+2:0] Quot;
   logic CvtResDenormUfE;
   logic DivStart, DivDone;
   
@@ -69,7 +70,6 @@ module testbenchfp;
   logic 			          ZSgnEffE;
   logic 			          PSgnE;
   logic       DivSgn;
-  logic [`DIVLEN-1:0] Quot;
   logic [`NE:0] DivCalcExp;
 
 
@@ -659,8 +659,8 @@ module testbenchfp;
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp),
-                .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
+                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
                 .DivDone, .Quot, .Rem());
                 
   assign CmpFlg[3:0] = 0;

From d17596353b744db0b2025051d4ef5517b1f7491f Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 23 Jun 2022 22:37:44 +0000
Subject: [PATCH 4/6] lint warning fix

---
 pipelined/src/fpu/fpu.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index da46d73e5..ff83079a8 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -123,7 +123,7 @@ module fpu (
    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`DIVLEN-1:0] Quot;
+   logic [`DIVLEN+2:0] Quot;
    logic [`NE:0] DivCalcExpM;
 
    // result and flag signals

From 97ded2cdd96e8e09083a6995cf29cc72dad02b4c Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 23 Jun 2022 22:59:43 +0000
Subject: [PATCH 5/6] div debug - accounted for 1 bit normalization in exponent
 calculation

---
 pipelined/regression/wave-fpu.do | 1 +
 pipelined/src/fpu/postprocess.sv | 5 +++--
 pipelined/src/fpu/round.sv       | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 60835ef67..a58400cca 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -23,5 +23,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 9138f9dfd..d970fdbce 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -94,6 +94,7 @@ module postprocess(
     logic                   IntToFp;       // is the opperation an int->fp conversion?
     logic                   ToInt;      // is the opperation an fp->int conversion?
     logic [`NE+1:0] RoundExp;
+    logic [`NE:0] CorrDivExp;
     logic [1:0] NegResMSBS;
     logic CvtOp;
     logic FmaOp;
@@ -138,7 +139,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Quot, .DivShiftAmt);
+    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
 
     always_comb
         case(PostProcSelM)
@@ -175,7 +176,7 @@ module postprocess(
     // round to infinity
     // round to nearest max magnitude
 
-    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM,
+    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
                 .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
                 .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
 
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 1fd471e9d..73395caed 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -24,7 +24,7 @@ module round(
     input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
     input logic                 RoundSgn,      // the result's sign
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
-    input logic [`NE:0]           DivCalcExpM,    // the calculated expoent
+    input logic [`NE:0]           CorrDivExp,    // the calculated expoent
     output logic                UfPlus1,  // do you add or subtract on from the result
     output logic [`NE+1:0]      FullResExp,      // ResExp with bits to determine sign and overflow
     output logic [`NF-1:0]      ResFrac,         // Result fraction
@@ -305,7 +305,7 @@ module round(
         case(PostProcSelM)
             2'b10: RoundExp = SumExp; // fma
             2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
-            2'b01: RoundExp = {DivCalcExpM[`NE], DivCalcExpM[`NE:0]}; // divide
+            2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
             default: RoundExp = 0; 
         endcase
 

From 86cdbd90e67176690fb39dcbc2bc1b8b08ad494f Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 23 Jun 2022 23:01:30 +0000
Subject: [PATCH 6/6] forgot a file

---
 pipelined/src/fpu/divshiftcalc.sv | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 pipelined/src/fpu/divshiftcalc.sv

diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
new file mode 100644
index 000000000..57022e5ae
--- /dev/null
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -0,0 +1,15 @@
+`include "wally-config.vh"
+
+module divshiftcalc(
+    input logic  [`DIVLEN+2:0] Quot,
+    input logic  [`NE:0] DivCalcExpM,
+    output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
+    output logic [`NE:0] CorrDivExp
+);
+    
+    assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
+    // the quotent is in the range [.5,2)
+    // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
+    assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
+
+endmodule