From a82cf3d0baf3301a7d2346f694e0401dfa143c23 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 29 Aug 2022 04:32:27 -0700 Subject: [PATCH 1/5] Simplify FSM --- pipelined/src/fpu/fdivsqrtfsm.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrtfsm.sv index c4780bcee..d506ad3b1 100644 --- a/pipelined/src/fpu/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrtfsm.sv @@ -103,7 +103,7 @@ module fdivsqrtfsm( if (SpecialCase) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin - if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin + if ((step == 1) | WZero) begin state <= #1 DONE; end step <= step - 1; From a6efbb3fda9b93966664ab52e999e721ab026ca5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 29 Aug 2022 12:01:09 -0700 Subject: [PATCH 2/5] Initial FDIVSQRT simplification working --- pipelined/src/fpu/fdivsqrtfsm.sv | 12 +++++++----- pipelined/src/fpu/fdivsqrtpreproc.sv | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrtfsm.sv index d506ad3b1..42ae7b15b 100644 --- a/pipelined/src/fpu/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrtfsm.sv @@ -66,6 +66,7 @@ module fdivsqrtfsm( //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DIVb+3:0] W; logic SpecialCase; + logic WZeroDelayed; // *** later remove //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); @@ -87,7 +88,8 @@ module fdivsqrtfsm( assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); assign DivSE = |W; end - assign DivDone = (state == DONE); + flopr #(1) WZeroReg(clk, reset | DivStart, WZero, WZeroDelayed); + assign DivDone = (state == DONE) | WZeroDelayed; assign W = WC+WS; assign NegSticky = W[`DIVb+3]; assign EarlyTermShiftE = step; @@ -102,14 +104,14 @@ module fdivsqrtfsm( step <= Dur; if (SpecialCase) state <= #1 DONE; else state <= #1 BUSY; + end else if (DivDone) begin + if (StallM) state <= #1 DONE; + else state <= #1 IDLE; end else if (state == BUSY) begin - if ((step == 1) | WZero) begin + if (step == 0) begin state <= #1 DONE; end step <= step - 1; - end else if (state == DONE) begin - if (StallM) state <= #1 DONE; - else state <= #1 IDLE; end end endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrtpreproc.sv index 0338c2b83..b835c2677 100644 --- a/pipelined/src/fpu/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrtpreproc.sv @@ -73,7 +73,7 @@ module fdivsqrtpreproc ( assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign X = Sqrt ? {SqrtX, {`DIVb-1-`NF{1'b0}}} : {~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; - assign Dur = (`DURLEN)'(`FPDUR); + assign Dur = (`DURLEN)'(`FPDUR-1); // radix 2 radix 4 // 1 copies DIVLEN+2 DIVLEN+2/2 From 28db4fdc70ef6a12f7a005a409682a01f568c7fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 29 Aug 2022 13:01:32 -0700 Subject: [PATCH 3/5] commented out lines to have divider work again --- pipelined/src/fpu/fdivsqrtfsm.sv | 24 ++++++++++++++++++++---- pipelined/src/fpu/fdivsqrtpreproc.sv | 2 +- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrtfsm.sv index 42ae7b15b..94a911d5c 100644 --- a/pipelined/src/fpu/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrtfsm.sv @@ -66,7 +66,7 @@ module fdivsqrtfsm( //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DIVb+3:0] W; logic SpecialCase; - logic WZeroDelayed; // *** later remove + logic WZeroDelayed, WZeroD; // *** later remove //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); @@ -83,13 +83,28 @@ module fdivsqrtfsm( assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; // *** |... for continual -1 is not efficent fix - also only needed for radix-2 assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0})|(((NextWSN+NextWCN+FZero)==0)&qn[`DIVCOPIES-1]); - assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn + assign DivSE = |W&~((W+FSticky)==0); //***not efficent fix == and need the & qn *** use next cycle end else begin assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVb+2:0]|NextWCN[`DIVb+2:0], 1'b0}); assign DivSE = |W; end + + if (`RADIX == 2) begin + logic [`DIVb+3:0] FZero, FSticky; + logic [`DIVb+2:0] LastK, FirstK; + assign LastK = ({3'b111, LastC} & ~({3'b111, LastC} << 1)); + assign FirstK = ({3'b111, FirstC<<1} & ~({3'b111, FirstC<<1} << 1)); + assign FZero = SqrtM ? {LastSM[`DIVb], LastSM, 2'b0} | {LastK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + assign FSticky = SqrtM ? {FirstSM[`DIVb], FirstSM, 2'b0} | {FirstK,1'b0} : {3'b1,D,{`DIVb-`DIVN+2{1'b0}}}; + // *** |... for continual -1 is not efficent fix - also only needed for radix-2 + assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0})|(((WS+WC+FZero)==0)&qn[`DIVCOPIES-1]); + end else begin + assign WZeroD = ((WS^WC)=={WS[`DIVb+2:0]|WC[`DIVb+2:0], 1'b0}); + end + flopr #(1) WZeroReg(clk, reset | DivStart, WZero, WZeroDelayed); - assign DivDone = (state == DONE) | WZeroDelayed; + assign DivDone = (state == DONE); +// assign DivDone = (state == DONE) | (WZeroDelayed & (state == BUSY)); assign W = WC+WS; assign NegSticky = W[`DIVb+3]; assign EarlyTermShiftE = step; @@ -108,7 +123,8 @@ module fdivsqrtfsm( if (StallM) state <= #1 DONE; else state <= #1 IDLE; end else if (state == BUSY) begin - if (step == 0) begin + if (step == 1 | WZero ) begin +// if (step == 1 /* | WZero */) begin state <= #1 DONE; end step <= step - 1; diff --git a/pipelined/src/fpu/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrtpreproc.sv index b835c2677..0338c2b83 100644 --- a/pipelined/src/fpu/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrtpreproc.sv @@ -73,7 +73,7 @@ module fdivsqrtpreproc ( assign SqrtX = Xe[0]^XZeroCnt[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0}; assign X = Sqrt ? {SqrtX, {`DIVb-1-`NF{1'b0}}} : {~XZero, PreprocX, {`DIVb-`NF{1'b0}}}; assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}}; - assign Dur = (`DURLEN)'(`FPDUR-1); + assign Dur = (`DURLEN)'(`FPDUR); // radix 2 radix 4 // 1 copies DIVLEN+2 DIVLEN+2/2 From e1760dde55cf4690a1d4d1d2904c955be967afd0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 30 Aug 2022 10:55:21 -0700 Subject: [PATCH 4/5] Fixed checking termination in testfloat testbench --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/regression/testfloat.do | 2 +- pipelined/src/fpu/fdivsqrtfsm.sv | 8 ++++---- pipelined/testbench/testbench-fp.sv | 12 +++++++----- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index e40506c7a..27df384fe 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,7 +101,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h2 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do index 4f7812a82..641781354 100644 --- a/pipelined/regression/testfloat.do +++ b/pipelined/regression/testfloat.do @@ -34,7 +34,7 @@ vlib work # $num = the added words after the call vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 -vsim -voptargs=+acc work.testbenchfp -G TEST=$2 +vsim -voptargs=+acc work.testbenchfp -G TEST=$2 -suppress 4014 view wave #-- display input and output signals as hexidecimal values diff --git a/pipelined/src/fpu/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrtfsm.sv index 94a911d5c..6ab1fca59 100644 --- a/pipelined/src/fpu/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrtfsm.sv @@ -103,8 +103,8 @@ module fdivsqrtfsm( end flopr #(1) WZeroReg(clk, reset | DivStart, WZero, WZeroDelayed); - assign DivDone = (state == DONE); -// assign DivDone = (state == DONE) | (WZeroDelayed & (state == BUSY)); +// assign DivDone = (state == DONE); + assign DivDone = (state == DONE) | (WZeroDelayed & (state == BUSY)); assign W = WC+WS; assign NegSticky = W[`DIVb+3]; assign EarlyTermShiftE = step; @@ -123,8 +123,8 @@ module fdivsqrtfsm( if (StallM) state <= #1 DONE; else state <= #1 IDLE; end else if (state == BUSY) begin - if (step == 1 | WZero ) begin -// if (step == 1 /* | WZero */) begin +// if (step == 1 | WZero ) begin + if (step == 1 /* | WZero */) begin state <= #1 DONE; end step <= step - 1; diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 2a5e6b4ff..d0f18a687 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -792,8 +792,9 @@ always_comb begin `CVTFPUNIT: ResFlg = Flg; endcase end - // check results on falling edge of clk - always @(negedge clk) begin + +// check results on falling edge of clk +always @(negedge clk) begin // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify: @@ -860,10 +861,11 @@ end // check if result is correct // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + // if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone | (TEST != "sqrt" & TEST != "div"))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; - $display("There is an error in %s", Tests[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $display("Error in %s", Tests[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; end From bdeb5c6509607e7ec0520f54e30e4d51b3ecf765 Mon Sep 17 00:00:00 2001 From: DTowersM Date: Wed, 31 Aug 2022 00:17:58 +0000 Subject: [PATCH 5/5] fixed qrduino keyerror in embench test --- pipelined/testbench/tests.vh | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index f1d96e154..5ce84f53d 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -61,6 +61,7 @@ string tvpaths[] = '{ "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", // "bd_speedopt_speed/src/primecount/primecount", + "bd_speedopt_speed/src/qrduino/qrduino", "bd_speedopt_speed/src/sglib-combined/sglib-combined", "bd_speedopt_speed/src/slre/slre", "bd_speedopt_speed/src/st/st",