diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index e40506c7a..27df384fe 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,7 +101,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h2 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do index 4f7812a82..641781354 100644 --- a/pipelined/regression/testfloat.do +++ b/pipelined/regression/testfloat.do @@ -34,7 +34,7 @@ vlib work # $num = the added words after the call vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 -vsim -voptargs=+acc work.testbenchfp -G TEST=$2 +vsim -voptargs=+acc work.testbenchfp -G TEST=$2 -suppress 4014 view wave #-- display input and output signals as hexidecimal values diff --git a/pipelined/src/fpu/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrtfsm.sv index c4780bcee..6ab1fca59 100644 --- a/pipelined/src/fpu/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrtfsm.sv @@ -66,6 +66,7 @@ module fdivsqrtfsm( //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DIVb+3:0] W; logic SpecialCase; + logic WZeroDelayed, WZeroD; // *** later remove //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); @@ -82,12 +83,28 @@ module fdivsqrtfsm( assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; // *** |... for continual -1 is not efficent fix - also only needed for radix-2 assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|(((NextWSN+NextWCN+FZero)==0)&qn[`DIVCOPIES-1]); - assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn + assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn *** use next cycle end else begin assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); assign DivSE = |W; end - assign DivDone = (state == DONE); + + if (`RADIX == 2) begin + logic [`DIVb+3:0] FZero, FSticky; + logic [`DIVb+2:0] LastK, FirstK; + assign LastK = ({3'b111, LastC} & ~({3'b111, LastC} << 1)); + assign FirstK = ({3'b111, FirstC<<1} & ~({3'b111, FirstC<<1} << 1)); + assign FZero = SqrtM ? {LastSM[`DIVb], LastSM, 2'b0} | {LastK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + // *** |... for continual -1 is not efficent fix - also only needed for radix-2 + assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0})|(((WS+WC+FZero)==0)&qn[`DIVCOPIES-1]); + end else begin + assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0}); + end + + flopr #(1) WZeroReg(clk, reset | DivStart, WZero, WZeroDelayed); +// assign DivDone = (state == DONE); + assign DivDone = (state == DONE) | (WZeroDelayed & (state == BUSY)); assign W = WC+WS; assign NegSticky = W[`DIVb+3]; assign EarlyTermShiftE = step; @@ -102,14 +119,15 @@ module fdivsqrtfsm( step <= Dur; if (SpecialCase) state <= #1 DONE; else state <= #1 BUSY; + end else if (DivDone) begin + if (StallM) state <= #1 DONE; + else state <= #1 IDLE; end else if (state == BUSY) begin - if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin +// if (step == 1 | WZero ) begin + if (step == 1 /* | WZero */) begin state <= #1 DONE; end step <= step - 1; - end else if (state == DONE) begin - if (StallM) state <= #1 DONE; - else state <= #1 IDLE; end end endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 2a5e6b4ff..d0f18a687 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -792,8 +792,9 @@ always_comb begin `CVTFPUNIT: ResFlg = Flg; endcase end - // check results on falling edge of clk - always @(negedge clk) begin + +// check results on falling edge of clk +always @(negedge clk) begin // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify: @@ -860,10 +861,11 @@ end // check if result is correct // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + // if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone | (TEST != "sqrt" & TEST != "div"))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; - $display("There is an error in %s", Tests[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $display("Error in %s", Tests[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; end diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index f1d96e154..5ce84f53d 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -61,6 +61,7 @@ string tvpaths[] = '{ "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", // "bd_speedopt_speed/src/primecount/primecount", + "bd_speedopt_speed/src/qrduino/qrduino", "bd_speedopt_speed/src/sglib-combined/sglib-combined", "bd_speedopt_speed/src/slre/slre", "bd_speedopt_speed/src/st/st",