From 1612daa2944b90bcda8f8af0799cb168aba4342e Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 23 Jun 2022 00:07:34 +0000
Subject: [PATCH] Testfloat running division - not passing

---
 pipelined/config/shared/wally-shared.vh |    9 +-
 pipelined/regression/testfloat.do       |    2 +-
 pipelined/regression/wave-fpu.do        |   15 +
 pipelined/src/fpu/cvtshiftcalc.sv       |    8 +-
 pipelined/src/fpu/fcvt.sv               |   20 +-
 pipelined/src/fpu/fpu.sv                |   12 +-
 pipelined/src/fpu/postprocess.sv        |   21 +-
 pipelined/src/fpu/resultsign.sv         |   10 +-
 pipelined/srt/qsel4.dat                 | 1024 +++++++++++++++++++++++
 pipelined/srt/qsel4.sv                  |    2 +-
 pipelined/srt/srt-radix4.sv             |   58 +-
 pipelined/srt/testbench-radix4.sv       |    2 +-
 pipelined/testbench/testbench-fp.sv     |  129 +--
 13 files changed, 1173 insertions(+), 139 deletions(-)
 create mode 100644 pipelined/srt/qsel4.dat

diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index afe822f46..3c2699da0 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -94,11 +94,12 @@
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
 
 // largest length in IEU/FPU
-`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
+`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
 `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
-`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
-`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
-`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
+`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
+`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
+`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
+`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
 
 // Disable spurious Verilator warnings
 
diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do
index 68c240c8a..db6948699 100644
--- a/pipelined/regression/testfloat.do
+++ b/pipelined/regression/testfloat.do
@@ -32,7 +32,7 @@ vlib work
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
-vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 
+vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv  ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 
 
 vsim -voptargs=+acc work.testbenchfp -G TEST=$2
 
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 61b35a51b..906eb2560 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -7,3 +7,18 @@ add wave -noupdate /testbenchfp/Y
 add wave -noupdate /testbenchfp/Z
 add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
+add wave -noupdate /testbenchfp/DivStart
+add wave -noupdate /testbenchfp/DivDone
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv
index 899dffb77..ab054342f 100644
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@@ -7,10 +7,10 @@ module cvtshiftcalc(
     input logic  [`NE:0]           CvtCalcExpM,    // the calculated expoent
     input logic  [`NF:0]           XManM,          // input mantissas
     input logic     [`FMTBITS-1:0]  OutFmt,       // output format
-    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
     input logic CvtResDenormUfM,
     output logic CvtResUf,
-    output logic [`LGLEN+`NF:0]    CvtShiftIn    // number to be shifted
+    output logic [`CVTLEN+`NF:0]    CvtShiftIn    // number to be shifted
 );
     logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
 
@@ -31,8 +31,8 @@ module cvtshiftcalc(
     //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
     //          - otherwise:
     //              |     LzcInM      | 0's if nessisary | 
-    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
-                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : 
+    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : 
+                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : 
                                    {CvtLzcInM, {`NF+1{1'b0}}};
     
     
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index a76122804..26ca7dd83 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -12,11 +12,11 @@ module fcvt (
     input logic             XDenormE,   // is the input denormalized
     input logic [`FMTBITS-1:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
     output logic [`NE:0]           CvtCalcExpE,    // the calculated expoent
-	output logic [`LOGLGLEN-1:0] CvtShiftAmtE,  // how much to shift by
+	output logic [`LOGCVTLEN-1:0] CvtShiftAmtE,  // how much to shift by
     output logic                   CvtResDenormUfE,// does the result underflow or is denormalized
     output logic                   CvtResSgnE,     // the result's sign
     output logic                   IntZeroE,      // is the integer zero?
-    output logic [`LGLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
+    output logic [`CVTLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
     );
 
     // OpCtrls:
@@ -43,7 +43,7 @@ module fcvt (
     logic                   Int64;      // is the integer 64 bits?
     logic                   IntToFp;       // is the opperation an int->fp conversion?
     logic                   ToInt;      // is the opperation an fp->int conversion?
-    logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
+    logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC
 
 
     // seperate OpCtrl for code readability
@@ -78,10 +78,10 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
-                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
+    assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
+                             {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
     
-    lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
+    lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
 
     ///////////////////////////////////////////////////////////////////////////
     // shifter
@@ -99,9 +99,9 @@ module fcvt (
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
-                    CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : 
-                              (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
+    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} :
+                    CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : 
+                              (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}};
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
@@ -180,7 +180,7 @@ module fcvt (
     //                  - shift left to normilize (-1-ZeroCnt)
     //                  - newBias to make the biased exponent
     //          
-    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
+    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})};
     // find if the result is dnormal or underflows
     //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
     //      - can't underflow an integer to Fp conversion
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index be73e9e7a..b8a2e1918 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -82,7 +82,7 @@ module fpu (
 
    // unpacking signals
    logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM;                       // input's sign - memory stage
+   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
    logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
    logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
    logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
@@ -116,11 +116,11 @@ module fpu (
 
    // Cvt Signals
     logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
-	 logic [`LOGLGLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+	 logic [`LOGCVTLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
     logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
     logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
     logic                   IntZeroE, IntZeroM;      // is the integer zero?
-    logic [`LGLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
+    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -317,7 +317,7 @@ module fpu (
 
    // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
    flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
+   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
    flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
    flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
    flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
@@ -333,7 +333,7 @@ module fpu (
    flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
                            {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
                            {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
-   flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
+   flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                            {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
                            {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
 
@@ -351,7 +351,7 @@ module fpu (
 
    assign FpLoadM = FResSelM[1];
 
-   postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
                            .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, 
                            .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, 
                            .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, 
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 267647346..c53920554 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -30,7 +30,7 @@
 `include "wally-config.vh"
 
 module postprocess(
-    input logic                             XSgnM,  // input signs
+    input logic                             XSgnM, YSgnM,  // input signs
     input logic     [`NE-1:0]               ZExpM, // input exponents
     input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
     input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
@@ -52,12 +52,13 @@ module postprocess(
     input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
     input logic CvtResDenormUfM,
-	input logic [`LOGLGLEN-1:0] CvtShiftAmtM,  // how much to shift by
+	input logic [`LOGCVTLEN-1:0] CvtShiftAmtM,  // how much to shift by
     input logic                   CvtResSgnM,     // the result's sign
     input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
-    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
     input logic             IntZeroM,         // is the input zero
     input logic [1:0] PostProcSelM, // select result to be written to fp register
+    input logic [`DIVLEN-1:0]   Quot,
     output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
     output logic    [4:0]          PostProcFlgM,
     output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
@@ -75,7 +76,7 @@ module postprocess(
     logic [3*`NF+8:0]            FmaShiftIn;        // is the sum zero
     logic               UfPlus1;                    // do you add one (for determining underflow flag)
     logic               Round;   // bits needed to determine rounding
-    logic [`LGLEN+`NF:0]    CvtShiftIn;    // number to be shifted
+    logic [`CVTLEN+`NF:0]    CvtShiftIn;    // number to be shifted
     logic               Mult;       // multiply opperation
     logic [`FLEN:0]     RoundAdd;       // how much to add to the result
     logic [`NE+1:0]     ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
@@ -143,12 +144,12 @@ module postprocess(
                 ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
             end
             2'b00: begin // cvt
-                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
-                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM};
+                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
             end
-            2'b01: begin //div
-                ShiftAmt = 0;//{DivShiftAmt};
-                ShiftIn =  0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
+            2'b01: begin //div ***prob can take out
+                ShiftAmt = 1'b0;//{DivShiftAmt};
+                ShiftIn =  {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}};
             end
             default: begin 
                 ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
@@ -181,7 +182,7 @@ module postprocess(
 
     resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
                           .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, 
-                          .CvtResSgnM, .RoundSgn, .ResSgn);
+                          .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Flags
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index c8862ff94..9a76cf8f3 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -4,6 +4,8 @@ module resultsign(
     input logic [2:0]   FrmM,
     input logic         PSgnM, ZSgnEffM,
     input logic         InvZM,
+    input logic         XSgnM,
+    input logic         YSgnM,
     input logic         ZInfM,
     input logic         InfIn,
     input logic         NegSumM,
@@ -25,6 +27,7 @@ module resultsign(
     logic FmaResSgn;
     logic FmaResSgnTmp;
     logic Underflow;
+    logic DivSgn;
     // logic ResultSgnTmp;
 
     // Determine the sign if the sum is zero
@@ -43,9 +46,10 @@ module resultsign(
     assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
     assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
 
-    // Sign for rounding calulation
-    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
+    assign DivSgn = XSgnM^YSgnM;
 
-    assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
+    // Sign for rounding calulation
+    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
+    assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/srt/qsel4.dat b/pipelined/srt/qsel4.dat
new file mode 100644
index 000000000..b92d81e8e
--- /dev/null
+++ b/pipelined/srt/qsel4.dat
@@ -0,0 +1,1024 @@
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+8
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/pipelined/srt/qsel4.sv b/pipelined/srt/qsel4.sv
index 069f4268c..70b8b92d2 100644
--- a/pipelined/srt/qsel4.sv
+++ b/pipelined/srt/qsel4.sv
@@ -11,7 +11,7 @@ module qsel4 (
 	logic [2:0] Dmsbs;
 	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
 	assign Wmsbs = PreWmsbs[7:1];
-	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
+        assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
 	// D = 0001.xxx...
 	// Dmsbs = |   |
     // W =      xxxx.xxx...
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index 6c9cd0fa7..671c63500 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -30,12 +30,9 @@
 
 `include "wally-config.vh"
 
-`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
-
 module srtradix4 (
   input  logic clk,
   input  logic DivStart, 
-  input  logic       XSgnE, YSgnE,
   input  logic [`NE-1:0] XExpE, YExpE,
   input  logic [`NF-1:0] XFrac, YFrac,
   input  logic [`XLEN-1:0] SrcA, SrcB,
@@ -44,8 +41,8 @@ module srtradix4 (
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       DivDone,
-  output logic       DivSgn,
-  output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
+  output logic [`DIVLEN-1:0] Quot,
+  output logic [`XLEN-1:0] Rem, // *** later handle integers
   output logic [`NE-1:0] DivExp
 );
 
@@ -91,7 +88,6 @@ module srtradix4 (
 
   // Store the expoenent and sign until division is DivDone
   flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp);
-  flopen #(1) signflop(clk, DivStart, calcSign, DivSgn);
 
   // Divisor Selection logic
   // *** radix 4 change to choose -2 to 2
@@ -115,13 +111,11 @@ module srtradix4 (
   csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
   
   //*** change for radix 4
-  otfc4  #(`DIVLEN) otfc4(clk, DivStart, q, Quot);
+  otfc4 otfc4(clk, DivStart, q, Quot);
 
   expcalc expcalc(.XExpE, .YExpE, .DivCalcExp);
 
-  signcalc signcalc(.XSgnE, .YSgnE, .calcSign);
-
-  counter counter(clk, DivStart, DivDone);
+  divcounter divcounter(clk, DivStart, DivDone);
 
 endmodule
 
@@ -132,7 +126,7 @@ endmodule
 /////////////
 // counter //
 /////////////
-module counter(input  logic clk, 
+module divcounter(input  logic clk, 
                input  logic DivStart, 
                output logic DivDone);
  
@@ -146,6 +140,7 @@ module counter(input  logic clk,
 
   always @(posedge clk)
     begin
+      DivDone = 0;
       if      (count == `DIVLEN/2+1) DivDone <= #1 1;
       else if (DivDone | DivStart) DivDone <= #1 0;	
       if (DivStart) count <= #1 0;
@@ -170,7 +165,7 @@ module qsel4 (
 	// Wmsbs = |        |
 
 	logic [3:0] QSel4[1023:0];
-	initial $readmemh("qslc_r4a2b.tv", QSel4);
+	initial $readmemh("../srt/qsel4.dat", QSel4);
 	assign q = QSel4[{Dmsbs,Wmsbs}];
 	
 endmodule
@@ -218,11 +213,11 @@ endmodule
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
-module otfc4 #(parameter N=65) (
+module otfc4 (
   input  logic         clk,
   input  logic         DivStart,
   input  logic [3:0]   q,
-  output logic [N-1:0] r
+  output logic [`DIVLEN-1:0] Quot
 );
 
   //  The on-the-fly converter transfers the quotient 
@@ -230,20 +225,20 @@ module otfc4 #(parameter N=65) (
   //
   //  This code follows the psuedocode presented in the 
   //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-2 division.
+  //  it is written for Radix-4 division.
   //
   //  QM is Q-1. It allows us to write negative bits 
   //  without using a costly CPA. 
-  logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
+  logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
   //  QR and QMR are the shifted versions of Q and QM.
   //  They are treated as [N-1:r] size signals, and 
   //  discard the r most significant bits of Q and QM. 
-  logic [N:0] QR, QMR;
+  logic [`DIVLEN:0] QR, QMR;
   // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, DivStart, QMux);
-  mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, DivStart, QMMux);
-  flop #(N+3) Qreg(clk, QMux, Q);
-  flop #(N+3) QMreg(clk, QMMux, QM);
+  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
+  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Q);
+  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   // shift Q (quotent) and QM (quotent-1)
 		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
@@ -253,11 +248,9 @@ module otfc4 #(parameter N=65) (
 		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
     // *** how does the 0 concatination numbers work?
 
-
-
   always_comb begin
-    QR  = Q[N:0];
-    QMR = QM[N:0];     // Shift Q and QM
+    QR  = Q[`DIVLEN:0];
+    QMR = QM[`DIVLEN:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
       QMNext = {QR,  2'b01};
@@ -275,7 +268,8 @@ module otfc4 #(parameter N=65) (
       QMNext = {QMR, 2'b11};
     end 
   end
-  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+  // Quot is in the range [.5, 2) so normalize the result if nesissary
+  assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
 
 endmodule
 
@@ -315,15 +309,3 @@ module expcalc(
   assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS);
 
 endmodule
-
-//////////////
-// signcalc //
-//////////////
-module signcalc(
-  input logic  XSgnE, YSgnE,
-  output logic calcSign
-);
-
-  assign calcSign = XSgnE ^ YSgnE;
-
-endmodule
\ No newline at end of file
diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv
index 0cea8059c..434ef74b0 100644
--- a/pipelined/srt/testbench-radix4.sv
+++ b/pipelined/srt/testbench-radix4.sv
@@ -50,7 +50,7 @@ module testbenchradix4;
                 .XExpE(aExp), .YExpE(bExp), .DivExp,
                 .XSgnE(asign), .YSgnE(bsign), .DivSgn,
                 .XFrac(afrac), .YFrac(bfrac), 
-                .SrcA('0), .SrcB('0), .Fmt(2'b00), 
+                .SrcA('0), .SrcB('0),
                 .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone,
                 .Quot, .Rem());
 
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 4bae7d106..748670b46 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -48,13 +48,13 @@ module testbenchfp;
   logic                 XInf, YInf, ZInf;                   // is the input infinity
   logic                 XZero, YZero, ZZero;                // is the input zero
   logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
-  logic  [`LGLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
+  logic  [`CVTLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
   logic        IntZeroE;
   logic CvtResSgnE;
-  logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
   logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
-	logic [`LOGLGLEN-1:0] CvtShiftAmtE;  // how much to shift by
+	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
   logic CvtResDenormUfE;
+  logic DivStart, DivDone;
   
 
   // in-between FMA signals
@@ -68,6 +68,9 @@ module testbenchfp;
   logic 			          NegSumE;
   logic 			          ZSgnEffE;
   logic 			          PSgnE;
+  logic       DivSgn;
+  logic [`DIVLEN-1:0] Quot;
+  logic [`NE-1:0] DivExp;
 
 
   ///////////////////////////////////////////////////////////////////////////////////////////////
@@ -205,16 +208,16 @@ module testbenchfp;
             Fmt = {Fmt, 2'b11};
           end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the divide tests/op-ctrls/unit/fmt
-      //   Tests = {Tests, f128div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //     for(int i = 0; i<5; i++) begin
-      //       Unit = {Unit, `DIVUNIT};
-      //       Fmt = {Fmt, 2'b11};
-      //     end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the divide tests/op-ctrls/unit/fmt
+        Tests = {Tests, f128div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+          for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'b11};
+          end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if square-root is being tested
       //   // add the square-root tests/op-ctrls/unit/fmt
       //   Tests = {Tests, f128sqrt};
@@ -332,16 +335,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b01};
         end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f64div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b01};
-      //   end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the correct tests/op-ctrls/unit/fmt to their lists
+        Tests = {Tests, f64div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `DIVUNIT};
+          Fmt = {Fmt, 2'b01};
+        end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if square-root is being tessted
       //   // add the correct tests/op-ctrls/unit/fmt to their lists
       //   Tests = {Tests, f64sqrt};
@@ -443,16 +446,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b00};
         end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f32div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b00};
-      //   end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the correct tests/op-ctrls/unit/fmt to their lists
+        Tests = {Tests, f32div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `DIVUNIT};
+          Fmt = {Fmt, 2'b00};
+        end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if sqrt is being tested
       //   // add the correct tests/op-ctrls/unit/fmt to their lists
       //   Tests = {Tests, f32sqrt};
@@ -536,16 +539,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b10};
         end
       end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f16div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b10};
-      //   end
-      // end
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
+        // add the correct tests/op-ctrls/unit/fmt to their lists
+        Tests = {Tests, f16div};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+          Unit = {Unit, `DIVUNIT};
+          Fmt = {Fmt, 2'b10};
+        end
+      end
       // if (TEST === "sqrt"  | TEST === "all") begin // if sqrt is being tested
       //   // add the correct tests/op-ctrls/unit/fmt to their lists
       //   Tests = {Tests, f16sqrt};
@@ -611,7 +614,7 @@ module testbenchfp;
   readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                     .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
                                     .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
-                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
+                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
                                     .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                     .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
                                     .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
@@ -639,8 +642,8 @@ module testbenchfp;
               .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
               .ProdExpE, .AddendStickyE, .KillProdE); 
               
-  postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
-              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
+  postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
+              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot,
               .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
               .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
               .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
@@ -650,21 +653,16 @@ module testbenchfp;
               .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
               .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
   
-fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
+  fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
             .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
             .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), 
-  //                 .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
-  //                 .CvtRes, .CvtFlgE);
-  // *** integrade divide and squareroot
-  //  fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), 
-  //        .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
-  //        .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
-  //        .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg));
-
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivExp,
+                .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
+                .DivDone, .Quot, .Rem());
+                
   assign CmpFlg[3:0] = 0;
 
   // produce clock
@@ -817,7 +815,7 @@ end
   ///////////////////////////////////////////////////////////////////////////////////////////////
 
     // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@@ -840,8 +838,7 @@ end
       $stop;
     end
 
-
-    VectorNum += 1; // increment the vector
+    if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
 
     if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
 
@@ -895,15 +892,17 @@ module readvectors (
   output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
   output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
   output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic XExpMaxE,
+  output logic                    XExpMaxE,
+  output logic                    DivStart,
   output logic [`FLEN-1:0] X, Y, Z
 );
 
   // apply test vectors on rising edge of clk
   // Format of vectors Inputs(1/2/3)_AnsFlg
-  always @(posedge clk) begin
+  always @(TestNum) begin
     #1; 
     AnsFlg = TestVector[4:0];
+    DivStart = 1'b0;
     case (Unit)
       `FMAUNIT:
         case (Fmt)
@@ -972,21 +971,29 @@ module readvectors (
             X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
             Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
             Ans = TestVector[8+(`Q_LEN-1):8];
+            DivStart = 1'b1; #10 // one clk cycle
+            DivStart = 1'b0;
           end
           2'b01:	begin	  // double
             X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
             Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
             Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
+            DivStart = 1'b1; #10
+            DivStart = 1'b0;
           end
           2'b00:	begin	  // single
             X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
             Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
             Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
+            DivStart = 1'b1; #10
+            DivStart = 1'b0;
           end
           2'b10:	begin	  // half
             X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
             Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
             Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
+            DivStart = 1'b1; #10
+            DivStart = 1'b0;
           end
         endcase
       `CMPUNIT: