diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index a58400cca..7dfec7e24 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv
index bee4d09c2..d867efc44 100644
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@@ -3,6 +3,8 @@
 module divshiftcalc(
     input logic  [`DIVLEN+2:0] Quot,
     input logic  [`NE+1:0] DivCalcExpM,
+    input logic  [`FMTBITS-1:0] FmtM,
+    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
     output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
     output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
     output logic [`NE+1:0] CorrDivExp
@@ -10,30 +12,81 @@ module divshiftcalc(
     logic ResDenorm;
     logic [`NE+1:0] DenormShift;
     logic [`NE+1:0] NormShift;
+    logic [`NE+1:0] Nf, NfPlus1;
 
     // is the result denromalized
     // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
     assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
+    // select the proper fraction lengnth
+    if (`FPSIZES == 1) begin
+        assign Nf = (`NE+2)'(`NF);
+        assign NfPlus1 = (`NE+2)'(`NF+1);
+
+    end else if (`FPSIZES == 2) begin
+        assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
+        assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1);
+
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (FmtM)
+                `FMT: begin
+                    Nf = (`NE+2)'(`NF);
+                    NfPlus1 = (`NE+2)'(`NF+1);
+                end
+                `FMT1: begin
+                    Nf = (`NE+2)'(`NF1);
+                    NfPlus1 = (`NE+2)'(`NF1+1);
+                end
+                `FMT2: begin
+                    Nf = (`NE+2)'(`NF2);
+                    NfPlus1 = (`NE+2)'(`NF2+1);
+                end
+                default: begin
+                    Nf = 1'bx;
+                    NfPlus1 = 1'bx;
+                end
+            endcase
+    end else if (`FPSIZES == 4) begin
+        always_comb
+            case (FmtM)
+                2'h3: begin
+                    Nf = (`NE+2)'(`Q_NF);
+                    NfPlus1 = (`NE+2)'(`Q_NF+1);
+                end
+                2'h1: begin
+                    Nf = (`NE+2)'(`D_NF);
+                    NfPlus1 = (`NE+2)'(`D_NF+1);
+                end
+                2'h0: begin
+                    Nf = (`NE+2)'(`S_NF);
+                    NfPlus1 = (`NE+2)'(`S_NF+1);
+                end
+                2'h2: begin
+                    Nf = (`NE+2)'(`H_NF);
+                    NfPlus1 = (`NE+2)'(`H_NF+1);
+                end
+            endcase
+    end
     // if the result is denormalized
     //  00000000x.xxxxxx...                     Exp = DivCalcExp
     //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
     //  .000xxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = 0
     //  .0000xxxxxxxxxxx... >> 1                Exp = 1
     // Left shift amount  = DivCalcExp+NF+1-1
-    assign DenormShift = (`NE+2)'(`NF)+DivCalcExpM;
-    // if the result is denormalized
+    assign DenormShift = Nf+DivCalcExpM;
+    // if the result is normalized
     //  00000000x.xxxxxx...                     Exp = DivCalcExp
     //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
     //  00000000x.xxxxxx... << NF+1             Exp = DivCalcExp
     //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1
     // Left shift amount  = NF+1 plus 1 if normalization required
-    assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
+    assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
     // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
+    assign DivShiftAmt = (ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0};
 
     // *** may be able to reduce shifter size
     assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
-    // the quotent is in the range [.5,2)
+    // the quotent is in the range [.5,2) if there is no early termination
     // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
     assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
 
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 2f43b27d4..aba1a8f48 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -127,6 +127,7 @@ module fpu (
    logic [`NE+1:0] DivCalcExpM;
    logic DivNegStickyM;
    logic DivStickyM;
+   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
 
    // result and flag signals
    logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@@ -357,7 +358,7 @@ module fpu (
 
    assign FpLoadM = FResSelM[1];
 
-   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
                            .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
                            .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
                            .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 6d80f661d..217e3f586 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -49,6 +49,7 @@ module postprocess(
     input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
     input logic                             PSgnM,      // the product's sign
     input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
+    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
     input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
     input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
     input logic [`NE+1:0]           DivCalcExpM,    // the calculated expoent
@@ -143,7 +144,7 @@ module postprocess(
                               .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
     fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                           .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
 
     always_comb
         case(PostProcSelM)
diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv
index a0bf86d85..d6d15e46f 100644
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@@ -211,7 +211,7 @@ module resultselect(
     //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
     //      - dont set to zero if fp input is zero but not using the fp input
     //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (YInfM&DivOp&~XInfM);//Underflow & ~ResDenorm & (ResExp!=1);
+    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
     assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
     // output infinity with result sign if divide by zero
     if(`IEEE754) begin
diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv
index a49838ace..179fbf45a 100644
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@@ -36,11 +36,14 @@ module srtradix4 (
   input  logic [`NE-1:0] XExpE, YExpE,
   input  logic [`NF:0] XManE, YManE,
   input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic XZeroE,
+  input  logic XInfE, YInfE, 
+  input  logic XZeroE, YZeroE, 
+  input  logic XNaNE, YNaNE, 
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
+  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
   output logic       DivDone,
   output logic       DivStickyE,
   output logic       DivNegStickyE,
@@ -49,10 +52,9 @@ module srtradix4 (
   output logic [`NE+1:0] DivCalcExpE
 );
 
-  // logic           qp, qz, qm; // quotient is +1, 0, or -1
   logic [3:0]     q;
   logic [`NE+1:0] DivCalcExp;
-  logic [`DIVLEN:0]    X;
+  logic [`DIVLEN-1:0]    X;
   logic [`DIVLEN-1:0]  Dpreproc;
   logic [`DIVLEN+3:0]  WS, WSA, WSN;
   logic [`DIVLEN+3:0]  WC, WCA, WCN;
@@ -68,13 +70,11 @@ module srtradix4 (
   // When start is asserted, the inputs are loaded into the divider.
   // Otherwise, the divisor is retained and the partial remainder
   // is fed back for the next iteration.
-  //  - assumed one is added here since all numbers are normlaized
-  //    *** wait what about zero? is that specal case? can the divider handle it?
   //  - when the start signal is asserted X and 0 are loaded into WS and WC
   //  - otherwise load WSA into the flipflop
-  //  *** what does N and A stand for?
-  //  *** change shift amount for radix4
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
+  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
+  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
   flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
   mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
@@ -117,12 +117,11 @@ module srtradix4 (
   
   //*** change for radix 4
   otfc4 otfc4(.clk, .DivStart, .q, .Quot);
-  assign DivStickyE = (WS+WC) != 0; //replace with early termination
-  assign DivNegStickyE = $signed(WS+WC) < 0; //replace with early termination
 
   expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
 
-  divcounter divcounter(clk, DivStart, DivDone);
+  earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
+                  .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
 
 endmodule
 
@@ -130,28 +129,35 @@ endmodule
 // Submodules //
 ////////////////
 
-/////////////
-// counter //
-/////////////
-module divcounter(input  logic clk, 
-               input  logic DivStart, 
-               output logic DivDone);
+module earlytermination(
+  input  logic clk, 
+	input logic [`DIVLEN+3:0] WS, WC,
+  input  logic XInfE, YInfE, 
+  input  logic XZeroE, YZeroE, 
+  input  logic XNaNE, YNaNE, 
+  input  logic DivStart, 
+  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
+  output logic DivStickyE,
+  output logic DivNegStickyE,
+  output logic DivDone);
  
-   logic    [5:0]  count;
-
-  // This block of control logic sequences the divider
-  // through its iterations.  You may modify it if you
-  // build a divider which completes in fewer iterations.
-  // You are not responsible for the (trivial) circuit
-  // design of the block.
+   logic [$clog2(`DIVLEN/2+3)-1:0]  Count;
+   logic WZero;
 
+   assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary
+   // *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop???
+  assign DivDone = (DivStickyE | WZero);
+  assign DivStickyE = ~|Count;
+  assign DivNegStickyE = $signed(WS+WC) < 0;
+  assign EarlyTermShiftDiv2E = Count;
+  // +1 for setup
+  // `DIVLEN/2 to get required number of bits
+  // +1 for possible .5 and round bit
+  // Count down Counter
   always @(posedge clk)
     begin
-      DivDone = 0;
-      if      (count == `DIVLEN/2+1) DivDone <= #1 1;
-      else if (DivDone | DivStart) DivDone <= #1 0;	
-      if (DivStart) count <= #1 0;
-      else     count <= #1 count+1;
+      if (DivStart) Count <= #1 `DIVLEN/2+2;
+      else     Count <= #1 Count-1;
     end
 endmodule
 
@@ -237,7 +243,7 @@ module srtpreproc (
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
-  output logic [`DIVLEN:0] X,
+  output logic [`DIVLEN-1:0] X,
   output logic [`DIVLEN-1:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
   output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
@@ -245,7 +251,7 @@ module srtpreproc (
 );
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
-  logic  [`DIVLEN:0] PreprocA, PreprocX;
+  logic  [`DIVLEN-1:0] PreprocA, PreprocX;
   logic  [`DIVLEN-1:0] PreprocB, PreprocY;
 
   // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
@@ -263,7 +269,7 @@ module srtpreproc (
 
   // assign PreprocA = ExtraA << zeroCntA;
   // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
   assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
 
   
@@ -300,7 +306,7 @@ module otfc4 (
   // if starting a new divison set Q to 0 and QM to -1
   mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
   mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
   flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
 
   // shift Q (quotent) and QM (quotent-1)
@@ -331,8 +337,7 @@ module otfc4 (
       QMNext = {QMR, 2'b11};
     end 
   end
-  // Quot is in the range [.5, 2) so normalize the result if nesissary
-  // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
+  // Final Quoteint is in the range [.5, 2)
 
 endmodule
 
@@ -371,7 +376,7 @@ module expcalc(
   output logic [`NE+1:0] DivCalcExp
 );
 
-  // correct exponent for denormal shifts
+  // correct exponent for denormalized input's normalization shifts
   assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
 
 endmodule
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index c80ffceb2..bbe045972 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -55,6 +55,7 @@ module testbenchfp;
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
 	logic [`DIVLEN+2:0] Quot;
   logic CvtResDenormUfE;
+  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
   logic DivStart, DivDone;
   
 
@@ -651,7 +652,7 @@ module testbenchfp;
               .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
               .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
               .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
-              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
+              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
               .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
   
   fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
@@ -660,9 +661,9 @@ module testbenchfp;
   fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .DivStickyE(DivSticky),
-                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
-                .DivNegStickyE(DivNegSticky), .DivDone, .Quot, .Rem());
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
+                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
+                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
                 
   assign CmpFlg[3:0] = 0;
 
@@ -815,8 +816,9 @@ end
 
   ///////////////////////////////////////////////////////////////////////////////////////////////
 
-    // check if the non-fma test is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    // check if result is correct
+    //  - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@@ -839,7 +841,7 @@ end
       $stop;
     end
 
-    if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
+    if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
 
     if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file