radix-4 early termination working for special cases - not working completely

2025-02-11 06:05:49 +00:00 · 2022-06-27 20:43:55 +00:00 · 2022-06-27 20:43:55 +00:00 · f25bb4a384
commit f25bb4a384
parent 2d5d1f4e8f
7 changed files with 114 additions and 52 deletions
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@ -22,7 +22,7 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
+add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@ -3,6 +3,8 @@
 module divshiftcalc(
    input logic  [`DIVLEN+2:0] Quot,
    input logic  [`NE+1:0] DivCalcExpM,
    input logic  [`FMTBITS-1:0] FmtM,
    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
    output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
    output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
    output logic [`NE+1:0] CorrDivExp
@ -10,30 +12,81 @@ module divshiftcalc(
    logic ResDenorm;
    logic [`NE+1:0] DenormShift;
    logic [`NE+1:0] NormShift;
    logic [`NE+1:0] Nf, NfPlus1;
    // is the result denromalized
    // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
    assign ResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:1]&~(DivCalcExpM[0]&Quot[`DIVLEN+2]));
    // select the proper fraction lengnth
    if (`FPSIZES == 1) begin
        assign Nf = (`NE+2)'(`NF);
        assign NfPlus1 = (`NE+2)'(`NF+1);
    end else if (`FPSIZES == 2) begin
        assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
        assign NfPlus1 = FmtM ? (`NE+2)'(`NF+1) : (`NE+2)'(`NF1+1);
    end else if (`FPSIZES == 3) begin
        always_comb
            case (FmtM)
                `FMT: begin
                    Nf = (`NE+2)'(`NF);
                    NfPlus1 = (`NE+2)'(`NF+1);
                end
                `FMT1: begin
                    Nf = (`NE+2)'(`NF1);
                    NfPlus1 = (`NE+2)'(`NF1+1);
                end
                `FMT2: begin
                    Nf = (`NE+2)'(`NF2);
                    NfPlus1 = (`NE+2)'(`NF2+1);
                end
                default: begin
                    Nf = 1'bx;
                    NfPlus1 = 1'bx;
                end
            endcase
    end else if (`FPSIZES == 4) begin
        always_comb
            case (FmtM)
                2'h3: begin
                    Nf = (`NE+2)'(`Q_NF);
                    NfPlus1 = (`NE+2)'(`Q_NF+1);
                end
                2'h1: begin
                    Nf = (`NE+2)'(`D_NF);
                    NfPlus1 = (`NE+2)'(`D_NF+1);
                end
                2'h0: begin
                    Nf = (`NE+2)'(`S_NF);
                    NfPlus1 = (`NE+2)'(`S_NF+1);
                end
                2'h2: begin
                    Nf = (`NE+2)'(`H_NF);
                    NfPlus1 = (`NE+2)'(`H_NF+1);
                end
            endcase
    end
    // if the result is denormalized
    //  00000000x.xxxxxx...                     Exp = DivCalcExp
    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
    //  .000xxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = 0
    //  .0000xxxxxxxxxxx... >> 1                Exp = 1
    // Left shift amount  = DivCalcExp+NF+1-1
-    assign DenormShift = (`NE+2)'(`NF)+DivCalcExpM;
+    assign DenormShift = Nf+DivCalcExpM;
-    // if the result is denormalized
+    // if the result is normalized
    //  00000000x.xxxxxx...                     Exp = DivCalcExp
    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
    //  00000000x.xxxxxx... << NF+1             Exp = DivCalcExp
    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1
    // Left shift amount  = NF+1 plus 1 if normalization required
-    assign NormShift = (`NE+2)'(`NF+1) + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
+    assign NormShift = NfPlus1 + {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
    // if the shift amount is negitive then dont shift (keep sticky bit)
-    assign DivShiftAmt = ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0];
+    assign DivShiftAmt = (ResDenorm ?  DenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M, 1'b0};
    // *** may be able to reduce shifter size
    assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}};
-    // the quotent is in the range [.5,2)
+    // the quotent is in the range [.5,2) if there is no early termination
    // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
    assign CorrDivExp = (ResDenorm&~DenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~Quot[`DIVLEN+2]};
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -127,6 +127,7 @@ module fpu (
   logic [`NE+1:0] DivCalcExpM;
   logic DivNegStickyM;
   logic DivStickyM;
   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
   // result and flag signals
   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@ -357,7 +358,7 @@ module fpu (
   assign FpLoadM = FResSelM[1];
-   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, 
+   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
                           .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@ -49,6 +49,7 @@ module postprocess(
    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
    input logic                             PSgnM,      // the product's sign
    input logic [2:0]                       FOpCtrlM,       // choose which opperation (look below for values)
    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
    input logic     [$clog2(3*`NF+7)-1:0]   FmaNormCntM,   // the normalization shift count
    input logic [`NE:0]           CvtCalcExpM,    // the calculated expoent
    input logic [`NE+1:0]           DivCalcExpM,    // the calculated expoent
@ -143,7 +144,7 @@ module postprocess(
                              .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
    fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
                          .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .CorrDivExp, .DivShiftAmt, .DivShiftIn);
    always_comb
        case(PostProcSelM)
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@ -211,7 +211,7 @@ module resultselect(
    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
    //      - dont set to zero if fp input is zero but not using the fp input
    //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (YInfM&DivOp&~XInfM);//Underflow & ~ResDenorm & (ResExp!=1);
+    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
    assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
    // output infinity with result sign if divide by zero
    if(`IEEE754) begin
--- a/pipelined/srt/srt-radix4.sv
+++ b/pipelined/srt/srt-radix4.sv
@ -36,11 +36,14 @@ module srtradix4 (
  input  logic [`NE-1:0] XExpE, YExpE,
  input  logic [`NF:0] XManE, YManE,
  input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic XZeroE,
+  input  logic XInfE, YInfE, 
  input  logic XZeroE, YZeroE, 
  input  logic XNaNE, YNaNE, 
  input  logic       W64, // 32-bit ints on XLEN=64
  input  logic       Signed, // Interpret integers as signed 2's complement
  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
  output logic       DivDone,
  output logic       DivStickyE,
  output logic       DivNegStickyE,
@ -49,10 +52,9 @@ module srtradix4 (
  output logic [`NE+1:0] DivCalcExpE
 );
  // logic           qp, qz, qm; // quotient is +1, 0, or -1
  logic [3:0]     q;
  logic [`NE+1:0] DivCalcExp;
-  logic [`DIVLEN:0]    X;
+  logic [`DIVLEN-1:0]    X;
  logic [`DIVLEN-1:0]  Dpreproc;
  logic [`DIVLEN+3:0]  WS, WSA, WSN;
  logic [`DIVLEN+3:0]  WC, WCA, WCN;
@ -68,13 +70,11 @@ module srtradix4 (
  // When start is asserted, the inputs are loaded into the divider.
  // Otherwise, the divisor is retained and the partial remainder
  // is fed back for the next iteration.
  //  - assumed one is added here since all numbers are normlaized
  //    *** wait what about zero? is that specal case? can the divider handle it?
  //  - when the start signal is asserted X and 0 are loaded into WS and WC
  //  - otherwise load WSA into the flipflop
-  //  *** what does N and A stand for?
+  //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
-  //  *** change shift amount for radix4
+  //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
+  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
  flop   #(`DIVLEN+4) wsflop(clk, WSN, WS);
  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
@ -117,12 +117,11 @@ module srtradix4 (
  //*** change for radix 4
  otfc4 otfc4(.clk, .DivStart, .q, .Quot);
  assign DivStickyE = (WS+WC) != 0; //replace with early termination
  assign DivNegStickyE = $signed(WS+WC) < 0; //replace with early termination
  expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
-  divcounter divcounter(clk, DivStart, DivDone);
+  earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
                  .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
 endmodule
@ -130,28 +129,35 @@ endmodule
 // Submodules //
 ////////////////
-/////////////
+module earlytermination(
-// counter //
+  input  logic clk, 
-/////////////
+	input logic [`DIVLEN+3:0] WS, WC,
-module divcounter(input  logic clk, 
+  input  logic XInfE, YInfE, 
  input  logic XZeroE, YZeroE, 
  input  logic XNaNE, YNaNE, 
  input  logic DivStart, 
  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
  output logic DivStickyE,
  output logic DivNegStickyE,
  output logic DivDone);
-   logic    [5:0]  count;
+   logic [$clog2(`DIVLEN/2+3)-1:0]  Count;
-
+   logic WZero;
  // This block of control logic sequences the divider
  // through its iterations.  You may modify it if you
  // build a divider which completes in fewer iterations.
  // You are not responsible for the (trivial) circuit
  // design of the block.
   assign WZero = (WS+WC == 0)|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; //*** temporary
   // *** rather than Counting should just be able to check if one of the two msbs of the quotent is 1 then stop???
  assign DivDone = (DivStickyE | WZero);
  assign DivStickyE = ~|Count;
  assign DivNegStickyE = $signed(WS+WC) < 0;
  assign EarlyTermShiftDiv2E = Count;
  // +1 for setup
  // `DIVLEN/2 to get required number of bits
  // +1 for possible .5 and round bit
  // Count down Counter
  always @(posedge clk)
    begin
-      DivDone = 0;
+      if (DivStart) Count <= #1 `DIVLEN/2+2;
-      if      (count == `DIVLEN/2+1) DivDone <= #1 1;
+      else     Count <= #1 Count-1;
      else if (DivDone | DivStart) DivDone <= #1 0;	
      if (DivStart) count <= #1 0;
      else     count <= #1 count+1;
    end
 endmodule
@ -237,7 +243,7 @@ module srtpreproc (
  input  logic       Signed, // Interpret integers as signed 2's complement
  input  logic       Int, // Choose integer inputs
  input  logic       Sqrt, // perform square root, not divide
-  output logic [`DIVLEN:0] X,
+  output logic [`DIVLEN-1:0] X,
  output logic [`DIVLEN-1:0] Dpreproc,
  output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
  output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
@ -245,7 +251,7 @@ module srtpreproc (
 );
  // logic  [`XLEN-1:0] PosA, PosB;
  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
-  logic  [`DIVLEN:0] PreprocA, PreprocX;
+  logic  [`DIVLEN-1:0] PreprocA, PreprocX;
  logic  [`DIVLEN-1:0] PreprocB, PreprocY;
  // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
@ -263,7 +269,7 @@ module srtpreproc (
  // assign PreprocA = ExtraA << zeroCntA;
  // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
  assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
@ -300,7 +306,7 @@ module otfc4 (
  // if starting a new divison set Q to 0 and QM to -1
  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
+  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
  // shift Q (quotent) and QM (quotent-1)
@ -331,8 +337,7 @@ module otfc4 (
      QMNext = {QMR, 2'b11};
    end 
  end
-  // Quot is in the range [.5, 2) so normalize the result if nesissary
+  // Final Quoteint is in the range [.5, 2)
  // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
 endmodule
@ -371,7 +376,7 @@ module expcalc(
  output logic [`NE+1:0] DivCalcExp
 );
-  // correct exponent for denormal shifts
+  // correct exponent for denormalized input's normalization shifts
  assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
 endmodule
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -55,6 +55,7 @@ module testbenchfp;
 	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
 	logic [`DIVLEN+2:0] Quot;
  logic CvtResDenormUfE;
  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
  logic DivStart, DivDone;
@ -651,7 +652,7 @@ module testbenchfp;
              .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
              .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
              .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
-              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
+              .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), 
              .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
  fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
@ -660,9 +661,9 @@ module testbenchfp;
  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .DivStickyE(DivSticky),
+  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
-                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), 
+                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
-                .DivNegStickyE(DivNegSticky), .DivDone, .Quot, .Rem());
+                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
  assign CmpFlg[3:0] = 0;
@ -815,8 +816,9 @@ end
  ///////////////////////////////////////////////////////////////////////////////////////////////
-    // check if the non-fma test is correct
+    // check if result is correct
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    //  - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
      errors += 1;
      $display("There is an error in %s", Tests[TestNum]);
      $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -839,7 +841,7 @@ end
      $stop;
    end
-    if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
+    if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
    if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file