Testfloat running division - not passing

2025-02-11 06:05:49 +00:00 · 2022-06-23 00:07:34 +00:00 · 2022-06-23 00:07:34 +00:00 · 1612daa294
commit 1612daa294
parent 48c65db35c
13 changed files with 1173 additions and 139 deletions
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -94,11 +94,12 @@
 `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
 // largest length in IEU/FPU
-`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
+`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
 `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
-`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
+`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
-`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
+`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
-`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
+`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
 `define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
 // Disable spurious Verilator warnings
--- a/pipelined/regression/testfloat.do
+++ b/pipelined/regression/testfloat.do
@ -32,7 +32,7 @@ vlib work
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
-vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 
+vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv  ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 
 vsim -voptargs=+acc work.testbenchfp -G TEST=$2
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@ -7,3 +7,18 @@ add wave -noupdate /testbenchfp/Y
 add wave -noupdate /testbenchfp/Z
 add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
 add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/DivDone
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@ -7,10 +7,10 @@ module cvtshiftcalc(
    input logic  [`NE:0]           CvtCalcExpM,    // the calculated expoent
    input logic  [`NF:0]           XManM,          // input mantissas
    input logic     [`FMTBITS-1:0]  OutFmt,       // output format
-    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
    input logic CvtResDenormUfM,
    output logic CvtResUf,
-    output logic [`LGLEN+`NF:0]    CvtShiftIn    // number to be shifted
+    output logic [`CVTLEN+`NF:0]    CvtShiftIn    // number to be shifted
 );
    logic [$clog2(`NF):0]	ResNegNF;   // the result's fraction length negated (-NF)
@ -31,8 +31,8 @@ module cvtshiftcalc(
    //              |  `NF-1  zeros   |     Mantissa      | 0's if nessisary | 
    //          - otherwise:
    //              |     LzcInM      | 0's if nessisary | 
-    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : 
+    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : 
-                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : 
+                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : 
                                   {CvtLzcInM, {`NF+1{1'b0}}};
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -12,11 +12,11 @@ module fcvt (
    input logic             XDenormE,   // is the input denormalized
    input logic [`FMTBITS-1:0] FmtE,        // the input's precision (11=quad 01=double 00=single 10=half)
    output logic [`NE:0]           CvtCalcExpE,    // the calculated expoent
-	output logic [`LOGLGLEN-1:0] CvtShiftAmtE,  // how much to shift by
+	output logic [`LOGCVTLEN-1:0] CvtShiftAmtE,  // how much to shift by
    output logic                   CvtResDenormUfE,// does the result underflow or is denormalized
    output logic                   CvtResSgnE,     // the result's sign
    output logic                   IntZeroE,      // is the integer zero?
-    output logic [`LGLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
+    output logic [`CVTLEN-1:0]      CvtLzcInE      // input to the Leading Zero Counter (priority encoder)
    );
    // OpCtrls:
@ -43,7 +43,7 @@ module fcvt (
    logic                   Int64;      // is the integer 64 bits?
    logic                   IntToFp;       // is the opperation an int->fp conversion?
    logic                   ToInt;      // is the opperation an fp->int conversion?
-    logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
+    logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC
    // seperate OpCtrl for code readability
@ -78,10 +78,10 @@ module fcvt (
    // choose the input to the leading zero counter i.e. priority encoder
    //             int -> fp : | positive integer | 00000... (if needed) | 
    //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
+    assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
-                             {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
+                             {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
-    lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
+    lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
    ///////////////////////////////////////////////////////////////////////////
    // shifter
@ -99,9 +99,9 @@ module fcvt (
    //              - only shift fp -> fp if the intital value is denormalized
    //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
    //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
+    assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} :
-                    CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : 
+                    CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : 
-                              (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
+                              (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}};
    ///////////////////////////////////////////////////////////////////////////
    // exp calculations
@ -180,7 +180,7 @@ module fcvt (
    //                  - shift left to normilize (-1-ZeroCnt)
    //                  - newBias to make the biased exponent
    //          
-    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
+    assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})};
    // find if the result is dnormal or underflows
    //      - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
    //      - can't underflow an integer to Fp conversion
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -82,7 +82,7 @@ module fpu (
   // unpacking signals
   logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM;                       // input's sign - memory stage
+   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
   logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
   logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
   logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
@ -116,11 +116,11 @@ module fpu (
   // Cvt Signals
    logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
-	 logic [`LOGLGLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+	 logic [`LOGCVTLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
    logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
    logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
    logic                   IntZeroE, IntZeroM;      // is the integer zero?
-    logic [`LGLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
+    logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
   // result and flag signals
   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@ -317,7 +317,7 @@ module fpu (
   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
+   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
   flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
   flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
@ -333,7 +333,7 @@ module fpu (
   flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
                           {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
                           {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
-   flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
+   flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                           {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
                           {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
@ -351,7 +351,7 @@ module fpu (
   assign FpLoadM = FResSelM[1];
-   postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, 
                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, 
                           .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, 
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@ -30,7 +30,7 @@
 `include "wally-config.vh"
 module postprocess(
-    input logic                             XSgnM,  // input signs
+    input logic                             XSgnM, YSgnM,  // input signs
    input logic     [`NE-1:0]               ZExpM, // input exponents
    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
@ -52,12 +52,13 @@ module postprocess(
    input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
    input logic CvtResDenormUfM,
-	input logic [`LOGLGLEN-1:0] CvtShiftAmtM,  // how much to shift by
+	input logic [`LOGCVTLEN-1:0] CvtShiftAmtM,  // how much to shift by
    input logic                   CvtResSgnM,     // the result's sign
    input logic             FWriteIntM,     // is fp->int (since it's writting to the integer register)
-    input logic  [`LGLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
+    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
    input logic             IntZeroM,         // is the input zero
    input logic [1:0] PostProcSelM, // select result to be written to fp register
    input logic [`DIVLEN-1:0]   Quot,
    output logic    [`FLEN-1:0]    PostProcResM,    // FMA final result
    output logic    [4:0]          PostProcFlgM,
    output logic [`XLEN-1:0] FCvtIntResM    // the int conversion result
@ -75,7 +76,7 @@ module postprocess(
    logic [3*`NF+8:0]            FmaShiftIn;        // is the sum zero
    logic               UfPlus1;                    // do you add one (for determining underflow flag)
    logic               Round;   // bits needed to determine rounding
-    logic [`LGLEN+`NF:0]    CvtShiftIn;    // number to be shifted
+    logic [`CVTLEN+`NF:0]    CvtShiftIn;    // number to be shifted
    logic               Mult;       // multiply opperation
    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
    logic [`NE+1:0]     ConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
@ -143,12 +144,12 @@ module postprocess(
                ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
            end
            2'b00: begin // cvt
-                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
+                ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM};
-                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
+                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
            end
-            2'b01: begin //div
+            2'b01: begin //div ***prob can take out
-                ShiftAmt = 0;//{DivShiftAmt};
+                ShiftAmt = 1'b0;//{DivShiftAmt};
-                ShiftIn =  0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
+                ShiftIn =  {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}};
            end
            default: begin 
                ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
@ -181,7 +182,7 @@ module postprocess(
    resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
                          .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, 
-                          .CvtResSgnM, .RoundSgn, .ResSgn);
+                          .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn);
    ///////////////////////////////////////////////////////////////////////////////
    // Flags
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@ -4,6 +4,8 @@ module resultsign(
    input logic [2:0]   FrmM,
    input logic         PSgnM, ZSgnEffM,
    input logic         InvZM,
    input logic         XSgnM,
    input logic         YSgnM,
    input logic         ZInfM,
    input logic         InfIn,
    input logic         NegSumM,
@ -25,6 +27,7 @@ module resultsign(
    logic FmaResSgn;
    logic FmaResSgnTmp;
    logic Underflow;
    logic DivSgn;
    // logic ResultSgnTmp;
    // Determine the sign if the sum is zero
@ -43,9 +46,10 @@ module resultsign(
    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
    assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
-    // Sign for rounding calulation
+    assign DivSgn = XSgnM^YSgnM;
    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
-    assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
+    // Sign for rounding calulation
    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
    assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
 endmodule
--- a/pipelined/srt/qsel4.dat
+++ b/pipelined/srt/qsel4.dat
--- a/pipelined/srt/qsel4.sv
+++ b/pipelined/srt/qsel4.sv
@ -11,7 +11,7 @@ module qsel4 (
 	logic [2:0] Dmsbs;
 	assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
 	assign Wmsbs = PreWmsbs[7:1];
-	assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
+        assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
 	// D = 0001.xxx...
 	// Dmsbs = |   |
    // W =      xxxx.xxx...
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@ -30,12 +30,9 @@
 `include "wally-config.vh"
 `define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
 module srtradix4 (
  input  logic clk,
  input  logic DivStart, 
  input  logic       XSgnE, YSgnE,
  input  logic [`NE-1:0] XExpE, YExpE,
  input  logic [`NF-1:0] XFrac, YFrac,
  input  logic [`XLEN-1:0] SrcA, SrcB,
@ -44,8 +41,8 @@ module srtradix4 (
  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
  output logic       DivDone,
-  output logic       DivSgn,
+  output logic [`DIVLEN-1:0] Quot,
-  output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
+  output logic [`XLEN-1:0] Rem, // *** later handle integers
  output logic [`NE-1:0] DivExp
 );
@ -91,7 +88,6 @@ module srtradix4 (
  // Store the expoenent and sign until division is DivDone
  flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp);
  flopen #(1) signflop(clk, DivStart, calcSign, DivSgn);
  // Divisor Selection logic
  // *** radix 4 change to choose -2 to 2
@ -115,13 +111,11 @@ module srtradix4 (
  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
  //*** change for radix 4
-  otfc4  #(`DIVLEN) otfc4(clk, DivStart, q, Quot);
+  otfc4 otfc4(clk, DivStart, q, Quot);
  expcalc expcalc(.XExpE, .YExpE, .DivCalcExp);
-  signcalc signcalc(.XSgnE, .YSgnE, .calcSign);
+  divcounter divcounter(clk, DivStart, DivDone);
  counter counter(clk, DivStart, DivDone);
 endmodule
@ -132,7 +126,7 @@ endmodule
 /////////////
 // counter //
 /////////////
-module counter(input  logic clk, 
+module divcounter(input  logic clk, 
               input  logic DivStart, 
               output logic DivDone);
@ -146,6 +140,7 @@ module counter(input  logic clk,
  always @(posedge clk)
    begin
      DivDone = 0;
      if      (count == `DIVLEN/2+1) DivDone <= #1 1;
      else if (DivDone | DivStart) DivDone <= #1 0;	
      if (DivStart) count <= #1 0;
@ -170,7 +165,7 @@ module qsel4 (
 	// Wmsbs = |        |
 	logic [3:0] QSel4[1023:0];
-	initial $readmemh("qslc_r4a2b.tv", QSel4);
+	initial $readmemh("../srt/qsel4.dat", QSel4);
 	assign q = QSel4[{Dmsbs,Wmsbs}];
 endmodule
@ -218,11 +213,11 @@ endmodule
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
-module otfc4 #(parameter N=65) (
+module otfc4 (
  input  logic         clk,
  input  logic         DivStart,
  input  logic [3:0]   q,
-  output logic [N-1:0] r
+  output logic [`DIVLEN-1:0] Quot
 );
  //  The on-the-fly converter transfers the quotient 
@ -230,20 +225,20 @@ module otfc4 #(parameter N=65) (
  //
  //  This code follows the psuedocode presented in the 
  //  floating point chapter of the book. Right now, 
-  //  it is written for Radix-2 division.
+  //  it is written for Radix-4 division.
  //
  //  QM is Q-1. It allows us to write negative bits 
  //  without using a costly CPA. 
-  logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
+  logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
  //  QR and QMR are the shifted versions of Q and QM.
  //  They are treated as [N-1:r] size signals, and 
  //  discard the r most significant bits of Q and QM. 
-  logic [N:0] QR, QMR;
+  logic [`DIVLEN:0] QR, QMR;
  // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, DivStart, QMux);
+  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
-  mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, DivStart, QMMux);
+  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flop #(N+3) Qreg(clk, QMux, Q);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Q);
-  flop #(N+3) QMreg(clk, QMMux, QM);
+  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
  // shift Q (quotent) and QM (quotent-1)
 		// if 	q = 2  	    Q = {Q, 10} 	QM = {Q, 01}		
@ -253,11 +248,9 @@ module otfc4 #(parameter N=65) (
 		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
    // *** how does the 0 concatination numbers work?
  always_comb begin
-    QR  = Q[N:0];
+    QR  = Q[`DIVLEN:0];
-    QMR = QM[N:0];     // Shift Q and QM
+    QMR = QM[`DIVLEN:0];     // Shift Q and QM
    if (q[3]) begin // +2
      QNext  = {QR,  2'b10};
      QMNext = {QR,  2'b01};
@ -275,7 +268,8 @@ module otfc4 #(parameter N=65) (
      QMNext = {QMR, 2'b11};
    end 
  end
-  assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
+  // Quot is in the range [.5, 2) so normalize the result if nesissary
  assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
 endmodule
@ -315,15 +309,3 @@ module expcalc(
  assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS);
 endmodule
 //////////////
 // signcalc //
 //////////////
 module signcalc(
  input logic  XSgnE, YSgnE,
  output logic calcSign
 );
  assign calcSign = XSgnE ^ YSgnE;
 endmodule
--- a/pipelined/srt/testbench-radix4.sv
+++ b/pipelined/srt/testbench-radix4.sv
@ -50,7 +50,7 @@ module testbenchradix4;
                .XExpE(aExp), .YExpE(bExp), .DivExp,
                .XSgnE(asign), .YSgnE(bsign), .DivSgn,
                .XFrac(afrac), .YFrac(bfrac), 
-                .SrcA('0), .SrcB('0), .Fmt(2'b00), 
+                .SrcA('0), .SrcB('0),
                .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone,
                .Quot, .Rem());
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -48,13 +48,13 @@ module testbenchfp;
  logic                 XInf, YInf, ZInf;                   // is the input infinity
  logic                 XZero, YZero, ZZero;                // is the input zero
  logic                 XExpMax, YExpMax, ZExpMax;         // is the input's exponent all ones  
-  logic  [`LGLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
+  logic  [`CVTLEN-1:0]      CvtLzcInE;      // input to the Leading Zero Counter (priority encoder)
  logic        IntZeroE;
  logic CvtResSgnE;
  logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
  logic [`NE:0]           CvtCalcExpE;    // the calculated expoent
-	logic [`LOGLGLEN-1:0] CvtShiftAmtE;  // how much to shift by
+	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
  logic CvtResDenormUfE;
  logic DivStart, DivDone;
  // in-between FMA signals
@ -68,6 +68,9 @@ module testbenchfp;
  logic 			          NegSumE;
  logic 			          ZSgnEffE;
  logic 			          PSgnE;
  logic       DivSgn;
  logic [`DIVLEN-1:0] Quot;
  logic [`NE-1:0] DivExp;
  ///////////////////////////////////////////////////////////////////////////////////////////////
@ -205,16 +208,16 @@ module testbenchfp;
            Fmt = {Fmt, 2'b11};
          end
      end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the divide tests/op-ctrls/unit/fmt
+        // add the divide tests/op-ctrls/unit/fmt
-      //   Tests = {Tests, f128div};
+        Tests = {Tests, f128div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
+        WriteInt = {WriteInt, 1'b0};
-      //     for(int i = 0; i<5; i++) begin
+          for(int i = 0; i<5; i++) begin
-      //       Unit = {Unit, `DIVUNIT};
+            Unit = {Unit, `DIVUNIT};
-      //       Fmt = {Fmt, 2'b11};
+            Fmt = {Fmt, 2'b11};
-      //     end
+          end
-      // end
+      end
      // if (TEST === "sqrt"  | TEST === "all") begin // if square-root is being tested
      //   // add the square-root tests/op-ctrls/unit/fmt
      //   Tests = {Tests, f128sqrt};
@ -332,16 +335,16 @@ module testbenchfp;
          Fmt = {Fmt, 2'b01};
        end
      end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
+        // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f64div};
+        Tests = {Tests, f64div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
+        WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
+        for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
+          Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b01};
+          Fmt = {Fmt, 2'b01};
-      //   end
+        end
-      // end
+      end
      // if (TEST === "sqrt"  | TEST === "all") begin // if square-root is being tessted
      //   // add the correct tests/op-ctrls/unit/fmt to their lists
      //   Tests = {Tests, f64sqrt};
@ -443,16 +446,16 @@ module testbenchfp;
          Fmt = {Fmt, 2'b00};
        end
      end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
+        // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f32div};
+        Tests = {Tests, f32div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
+        WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
+        for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
+          Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b00};
+          Fmt = {Fmt, 2'b00};
-      //   end
+        end
-      // end
+      end
      // if (TEST === "sqrt"  | TEST === "all") begin // if sqrt is being tested
      //   // add the correct tests/op-ctrls/unit/fmt to their lists
      //   Tests = {Tests, f32sqrt};
@ -536,16 +539,16 @@ module testbenchfp;
          Fmt = {Fmt, 2'b10};
        end
      end
-      // if (TEST === "div"   | TEST === "all") begin // if division is being tested
+      if (TEST === "div"   | TEST === "all") begin // if division is being tested
-      //   // add the correct tests/op-ctrls/unit/fmt to their lists
+        // add the correct tests/op-ctrls/unit/fmt to their lists
-      //   Tests = {Tests, f16div};
+        Tests = {Tests, f16div};
-      //   OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
-      //   WriteInt = {WriteInt, 1'b0};
+        WriteInt = {WriteInt, 1'b0};
-      //   for(int i = 0; i<5; i++) begin
+        for(int i = 0; i<5; i++) begin
-      //     Unit = {Unit, `DIVUNIT};
+          Unit = {Unit, `DIVUNIT};
-      //     Fmt = {Fmt, 2'b10};
+          Fmt = {Fmt, 2'b10};
-      //   end
+        end
-      // end
+      end
      // if (TEST === "sqrt"  | TEST === "all") begin // if sqrt is being tested
      //   // add the correct tests/op-ctrls/unit/fmt to their lists
      //   Tests = {Tests, f16sqrt};
@ -611,7 +614,7 @@ module testbenchfp;
  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                    .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
                                    .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
-                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan),
+                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
                                    .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
                                    .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
                                    .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
@ -639,8 +642,8 @@ module testbenchfp;
              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
              .ProdExpE, .AddendStickyE, .KillProdE); 
-  postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
+  postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
-              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
+              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot,
              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
@ -650,20 +653,15 @@ module testbenchfp;
              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
              .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
-fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
+  fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
            .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
            .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), 
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivExp,
-  //                 .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
+                .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
-  //                 .CvtRes, .CvtFlgE);
+                .DivDone, .Quot, .Rem());
  // *** integrade divide and squareroot
  //  fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), 
  //        .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
  //        .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
  //        .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg));
  assign CmpFlg[3:0] = 0;
@ -817,7 +815,7 @@ end
  ///////////////////////////////////////////////////////////////////////////////////////////////
    // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
      errors += 1;
      $display("There is an error in %s", Tests[TestNum]);
      $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -840,8 +838,7 @@ end
      $stop;
    end
-
+    if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
    VectorNum += 1; // increment the vector
    if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
@ -895,15 +892,17 @@ module readvectors (
  output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
  output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
  output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic XExpMaxE,
+  output logic                    XExpMaxE,
  output logic                    DivStart,
  output logic [`FLEN-1:0] X, Y, Z
 );
  // apply test vectors on rising edge of clk
  // Format of vectors Inputs(1/2/3)_AnsFlg
-  always @(posedge clk) begin
+  always @(TestNum) begin
    #1; 
    AnsFlg = TestVector[4:0];
    DivStart = 1'b0;
    case (Unit)
      `FMAUNIT:
        case (Fmt)
@ -972,21 +971,29 @@ module readvectors (
            X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
            Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
            Ans = TestVector[8+(`Q_LEN-1):8];
            DivStart = 1'b1; #10 // one clk cycle
            DivStart = 1'b0;
          end
          2'b01:	begin	  // double
            X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
            Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
            Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
            DivStart = 1'b1; #10
            DivStart = 1'b0;
          end
          2'b00:	begin	  // single
            X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
            Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
            Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
            DivStart = 1'b1; #10
            DivStart = 1'b0;
          end
          2'b10:	begin	  // half
            X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
            Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
            Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
            DivStart = 1'b1; #10
            DivStart = 1'b0;
          end
        endcase
      `CMPUNIT: