From a129e2750246e72fadb983a4699844aa80653abf Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 27 Dec 2022 15:07:01 -0600 Subject: [PATCH 01/14] signal name changes in ram2p. --- pipelined/src/generic/mem/ram2p1r1wb.sv | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pipelined/src/generic/mem/ram2p1r1wb.sv b/pipelined/src/generic/mem/ram2p1r1wb.sv index c11246d3..7ded3a20 100644 --- a/pipelined/src/generic/mem/ram2p1r1wb.sv +++ b/pipelined/src/generic/mem/ram2p1r1wb.sv @@ -49,21 +49,21 @@ module ram2p1r1wb input logic reset, // port 1 is read only - input logic [DEPTH-1:0] RA1, - output logic [WIDTH-1:0] RD1, - input logic REN1, + input logic [DEPTH-1:0] ra1, + output logic [WIDTH-1:0] rd1, + input logic ren1, // port 2 is write only - input logic [DEPTH-1:0] WA1, - input logic [WIDTH-1:0] WD1, - input logic WEN1, - input logic [WIDTH-1:0] BitWEN1 + input logic [DEPTH-1:0] wa2, + input logic [WIDTH-1:0] wd2, + input logic wen2, + input logic [WIDTH-1:0] bwe2 ); - logic [DEPTH-1:0] RA1Q, WA1Q; - logic WEN1Q; - logic [WIDTH-1:0] WD1Q; + logic [DEPTH-1:0] ra1q, wa2q; + logic wen2q; + logic [width-1:0] wd2q; logic [WIDTH-1:0] mem[2**DEPTH-1:0]; logic [WIDTH-1:0] bwe; @@ -76,18 +76,18 @@ module ram2p1r1wb // prefer not to have two-cycle write latency // will require branch predictor changes - flopenr #(DEPTH) RA1Reg(clk, reset, REN1, RA1, RA1Q); - flopenr #(DEPTH) WA1Reg(clk, reset, REN1, WA1, WA1Q); - flopr #(1) WEN1Reg(clk, reset, WEN1, WEN1Q); - flopenr #(WIDTH) WD1Reg(clk, reset, REN1, WD1, WD1Q); + flopenr #(DEPTH) ra1Reg(clk, reset, ren1, ra1, ra1q); + flopenr #(DEPTH) wa2Reg(clk, reset, ren1, wa2, wa2q); + flopr #(1) wen2Reg(clk, reset, wen2, wen2q); + flopenr #(WIDTH) wd2Reg(clk, reset, ren1, wd2, wd2q); // read port - assign RD1 = mem[RA1Q]; + assign rd1 = mem[ra1q]; // write port - assign bwe = {WIDTH{WEN1Q}} & BitWEN1; + assign bwe = {WIDTH{wen2q}} & bwe2; always_ff @(posedge clk) - mem[WA1Q] <= WD1Q & bwe | mem[WA1Q] & ~bwe; + mem[wa2q] <= wd2q & bwe | mem[wa2q] & ~bwe; endmodule From 654b10894c270c10f28d188b03603250f1e54af3 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 29 Dec 2022 17:07:50 -0600 Subject: [PATCH 02/14] Re-enabled the branch predictor in rv64gc. --- pipelined/config/rv64gc/wally-config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 6ca3a56c..65d27aa3 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -139,7 +139,7 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define BPRED_ENABLED 0 +`define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 `define BPRED_SIZE 10 From 872ff619e397b1fe63bbf1a4dca878d6642c63bb Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Thu, 29 Dec 2022 17:13:48 -0600 Subject: [PATCH 03/14] Fixed problems with changes to ram2p. --- pipelined/src/generic/mem/ram2p1r1wb.sv | 2 +- pipelined/src/ifu/BTBPredictor.sv | 14 +++++++------- pipelined/src/ifu/globalHistoryPredictor.sv | 14 +++++++------- pipelined/src/ifu/gsharePredictor.sv | 14 +++++++------- pipelined/src/ifu/localHistoryPredictor.sv | 14 +++++++------- pipelined/src/ifu/twoBitPredictor.sv | 14 +++++++------- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/pipelined/src/generic/mem/ram2p1r1wb.sv b/pipelined/src/generic/mem/ram2p1r1wb.sv index 7ded3a20..dac1290f 100644 --- a/pipelined/src/generic/mem/ram2p1r1wb.sv +++ b/pipelined/src/generic/mem/ram2p1r1wb.sv @@ -63,7 +63,7 @@ module ram2p1r1wb logic [DEPTH-1:0] ra1q, wa2q; logic wen2q; - logic [width-1:0] wd2q; + logic [WIDTH-1:0] wd2q; logic [WIDTH-1:0] mem[2**DEPTH-1:0]; logic [WIDTH-1:0] bwe; diff --git a/pipelined/src/ifu/BTBPredictor.sv b/pipelined/src/ifu/BTBPredictor.sv index f03bbdd5..3c90497e 100644 --- a/pipelined/src/ifu/BTBPredictor.sv +++ b/pipelined/src/ifu/BTBPredictor.sv @@ -105,13 +105,13 @@ module BTBPredictor // *** optimize for byte write enables ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk), .reset(reset), - .RA1(LookUpPCIndex), - .RD1({{InstrClass, TargetPC}}), - .REN1(~StallF), - .WA1(UpdatePCIndex), - .WD1({UpdateInstrClass, UpdateTarget}), - .WEN1(UpdateEN), - .BitWEN1({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right. + .ra1(LookUpPCIndex), + .rd1({{InstrClass, TargetPC}}), + .ren1(~StallF), + .wa2(UpdatePCIndex), + .wd2({UpdateInstrClass, UpdateTarget}), + .wen2(UpdateEN), + .bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right. endmodule diff --git a/pipelined/src/ifu/globalHistoryPredictor.sv b/pipelined/src/ifu/globalHistoryPredictor.sv index 6d06dc8c..29cb735d 100644 --- a/pipelined/src/ifu/globalHistoryPredictor.sv +++ b/pipelined/src/ifu/globalHistoryPredictor.sv @@ -116,12 +116,12 @@ module globalHistoryPredictor ram2p1r1wb #(k, 2) PHT(.clk(clk), .reset(reset), //.RA1(GHR[k-1:0]), - .RA1(GHRLookup), - .RD1(BPPredF), - .REN1(~StallF), - .WA1(PHTUpdateAdr), - .WD1(UpdateBPPredE), - .WEN1(PHTUpdateEN), - .BitWEN1(2'b11)); + .ra1(GHRLookup), + .rd1(BPPredF), + .ren1(~StallF), + .wa2(PHTUpdateAdr), + .wd2(UpdateBPPredE), + .wen2(PHTUpdateEN), + .bwe2(2'b11)); endmodule diff --git a/pipelined/src/ifu/gsharePredictor.sv b/pipelined/src/ifu/gsharePredictor.sv index ff111a3e..fa780194 100644 --- a/pipelined/src/ifu/gsharePredictor.sv +++ b/pipelined/src/ifu/gsharePredictor.sv @@ -113,12 +113,12 @@ module gsharePredictor ram2p1r1wb #(`BPRED_SIZE, 2) PHT(.clk(clk), .reset(reset), //.RA1(GHR[`BPRED_SIZE-1:0]), - .RA1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]), - .RD1(BPPredF), - .REN1(~StallF), - .WA1(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]), - .WD1(UpdateBPPredE), - .WEN1(PHTUpdateEN), - .BitWEN1(2'b11)); + .ra1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]), + .rd1(BPPredF), + .ren1(~StallF), + .wa2(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]), + .wd2(UpdateBPPredE), + .wen2(PHTUpdateEN), + .bwe2(2'b11)); endmodule // gsharePredictor diff --git a/pipelined/src/ifu/localHistoryPredictor.sv b/pipelined/src/ifu/localHistoryPredictor.sv index 97b2b6f5..02ad4cf1 100644 --- a/pipelined/src/ifu/localHistoryPredictor.sv +++ b/pipelined/src/ifu/localHistoryPredictor.sv @@ -86,13 +86,13 @@ module localHistoryPredictor // LHRE refers to the address that the past k branches points to in the exectution stage ram2p1r1wb #(k, 2) PHT(.clk(clk), .reset(reset), - .RA1(ForwardLHRNext), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(LHRFNext), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); + .ra1(ForwardLHRNext), + .rd1(PredictionMemory), + .ren1(~StallF), + .wa2(LHRFNext), + .wd2(UpdatePrediction), + .wen2(UpdateEN), + .bwe2(2'b11)); diff --git a/pipelined/src/ifu/twoBitPredictor.sv b/pipelined/src/ifu/twoBitPredictor.sv index 5ffb29d3..7459ea6a 100644 --- a/pipelined/src/ifu/twoBitPredictor.sv +++ b/pipelined/src/ifu/twoBitPredictor.sv @@ -62,13 +62,13 @@ module twoBitPredictor ram2p1r1wb #(Depth, 2) PHT(.clk(clk), .reset(reset), - .RA1(LookUpPCIndex), - .RD1(PredictionMemory), - .REN1(~StallF), - .WA1(UpdatePCIndex), - .WD1(UpdatePrediction), - .WEN1(UpdateEN), - .BitWEN1(2'b11)); + .ra1(LookUpPCIndex), + .rd1(PredictionMemory), + .ren1(~StallF), + .wa2(UpdatePCIndex), + .wd2(UpdatePrediction), + .wen2(UpdateEN), + .bwe2(2'b11)); // need to forward when updating to the same address as reading. // first we compare to see if the update and lookup addreses are the same From ef37070eee0217b60f0d539cda24e92bb4327b29 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 29 Dec 2022 21:09:23 -0800 Subject: [PATCH 04/14] Fixed register timing failure on SpecialCaseM in fdivsqrt --- pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index a5735ba3..c16abd9b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -69,7 +69,8 @@ module fdivsqrtfsm( assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered? assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE; end else assign SpecialCaseE = FSpecialCaseE; - flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc + //flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc + flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc // DIVN = `NF+3 // NS = NF + 1 From e9b314f9023e3be52825c66d20576ef75bd788ca Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:40:25 -0800 Subject: [PATCH 05/14] fdiv cleanup, reduce number of rv32f fma_b15 tests being run to speed up regression --- pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv | 1 - pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 16 +++++++--------- pipelined/testbench/tests.vh | 4 ++-- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv index c16abd9b..a950ea7b 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -69,7 +69,6 @@ module fdivsqrtfsm( assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered? assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE; end else assign SpecialCaseE = FSpecialCaseE; - //flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc // DIVN = `NF+3 diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 8bedd384..6d955d61 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -157,13 +157,6 @@ module fdivsqrtpostproc( end else begin NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); PreResultM = IntQuotM; - /* - if (~ALTBM & NegQuotM) begin - PreResultM = {3'b111, -IntQuotM}; - end else begin - PreResultM = {3'b000, IntQuotM}; - end*/ - //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender end @@ -171,7 +164,12 @@ module fdivsqrtpostproc( assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases - // *** conditional on RV64 - assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64 + + // sign extend result for W64 + if (`XLEN==64) + assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : + SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64 + else + assign FPIntDivResultM = SpecialFPIntDivResultM[`XLEN-1:0]; end endmodule \ No newline at end of file diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 61e45d9e..48a29303 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1098,7 +1098,7 @@ string imperas32f[] = '{ "rv64i_m/F/src/flw-align-01.S", "rv64i_m/F/src/fmadd_b1-01.S", "rv64i_m/F/src/fmadd_b14-01.S", - "rv64i_m/F/src/fmadd_b15-01.S", + //"rv64i_m/F/src/fmadd_b15-01.S", "rv64i_m/F/src/fmadd_b16-01.S", "rv64i_m/F/src/fmadd_b17-01.S", "rv64i_m/F/src/fmadd_b18-01.S", @@ -1473,7 +1473,7 @@ string imperas32f[] = '{ "rv32i_m/F/src/fmin_b19-01.S", "rv32i_m/F/src/fmsub_b1-01.S", "rv32i_m/F/src/fmsub_b14-01.S", - "rv32i_m/F/src/fmsub_b15-01.S", + //"rv32i_m/F/src/fmsub_b15-01.S", "rv32i_m/F/src/fmsub_b16-01.S", "rv32i_m/F/src/fmsub_b17-01.S", "rv32i_m/F/src/fmsub_b18-01.S", From 0e9bd5dab558910d5b577b5660b5790f71991eab Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:45:51 -0800 Subject: [PATCH 06/14] fdivsqrtpreproc shift simplification --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b3f42a7c..cb883365 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -57,7 +57,6 @@ module fdivsqrtpreproc ( // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; logic [`DIVBLEN:0] mE; - logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; logic [`LOGRK:0] pPrTrunc; logic [`DIVb+3:0] PreShiftX; @@ -71,6 +70,7 @@ module fdivsqrtpreproc ( logic AsE, BsE, ALTBE, NegQuotE; logic [`XLEN-1:0] AE, BE; logic [`XLEN-1:0] PosA, PosB; + logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; @@ -108,12 +108,12 @@ module fdivsqrtpreproc ( /* verilator lint_off WIDTH */ // right shift amount to complete in discrete number of steps - assign pPlusr = (`DIVBLEN)'(`LOGR) + p; + assign pPlusr = `LOGR + p; assign pPrTrunc = pPlusr % `RK; - assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)}; - assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1}; - assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1}; - assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK); + assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc; + assign nE = (pPrCeil * `DIVCOPIES) - 1; + assign IntBits = `LOGR + p - 1; + assign RightShiftX = `RK - 1 - IntBits % `RK; /* verilator lint_on WIDTH */ // Selet integer or floating-point operands From dba3ffe767f27f3719b75a381ca793998bd66e95 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:47:40 -0800 Subject: [PATCH 07/14] Reduced size of preproc right shift --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index cb883365..d0a06079 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -70,7 +70,8 @@ module fdivsqrtpreproc ( logic AsE, BsE, ALTBE, NegQuotE; logic [`XLEN-1:0] AE, BE; logic [`XLEN-1:0] PosA, PosB; - logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; + logic [`DIVBLEN:0] ZeroDiff, IntBits; + logic [`LOGRK-1:0] RightShiftX; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; From 4fb839686750a27cccd0769c5bf2aaf7f3e69180 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 06:55:20 -0800 Subject: [PATCH 08/14] Clean up sqrt initialization mux --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index d0a06079..e5645674 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -51,20 +51,14 @@ module fdivsqrtpreproc ( ); logic [`DIVb-1:0] XPreproc; - logic [`DIVb:0] SqrtX; - logic [`DIVb+3:0] DivX; + logic [`DIVb:0] PreSqrtX; + logic [`DIVb+3:0] DivX, SqrtX; logic [`NE+1:0] QeE; - // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; - logic [`DIVBLEN:0] mE; - logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell; - logic [`LOGRK:0] pPrTrunc; + logic [`DIVBLEN:0] mE, ell; logic [`DIVb+3:0] PreShiftX; logic NumZeroE; - // ***can probably merge X LZC with conversion - // cout the number of leading zeros - if (`IDIV_ON_FPU) begin logic signedDiv; logic AsE, BsE, ALTBE, NegQuotE; @@ -72,6 +66,8 @@ module fdivsqrtpreproc ( logic [`XLEN-1:0] PosA, PosB; logic [`DIVBLEN:0] ZeroDiff, IntBits; logic [`LOGRK-1:0] RightShiftX; + logic [`DIVBLEN:0] pPlusr, pPrCeil, p; + logic [`LOGRK-1:0] pPrTrunc; // Extract inputs, signs, zero, depending on W64 mode if applicable assign signedDiv = ~Funct3E[0]; @@ -149,16 +145,15 @@ module fdivsqrtpreproc ( assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); // append leading 1 (for nonzero inputs) and zero-extend - assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF + assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF assign DivX = {3'b000, ~NumZeroE, XPreproc}; - - // *** explain why X is shifted between radices (initial assignment of WS=RX) - if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX; - else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX; - + // Sqrt is initialized after a first step of R(X-1), which depends on Radix + if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; + else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; + assign PreShiftX = Sqrt ? SqrtX : DivX; + // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - - flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); endmodule From 3c475455d9208ba5a1cbfb28a698cd11ad2b02fb Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:00:48 -0800 Subject: [PATCH 09/14] Clean up sqrt preproc --- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index e5645674..63d391ae 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -104,7 +104,7 @@ module fdivsqrtpreproc ( assign p = ALTBE ? '0 : ZeroDiff; /* verilator lint_off WIDTH */ - // right shift amount to complete in discrete number of steps + // calculate number of cycles nE right shift amount RightShiftX to complete in discrete number of steps assign pPlusr = `LOGR + p; assign pPrTrunc = pPlusr % `RK; assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc; @@ -145,6 +145,7 @@ module fdivsqrtpreproc ( assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1}); // append leading 1 (for nonzero inputs) and zero-extend + // *** explain this next line assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF assign DivX = {3'b000, ~NumZeroE, XPreproc}; // Sqrt is initialized after a first step of R(X-1), which depends on Radix From ba976d66e40868e8cac69764dfa833aad1bcb73c Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:01:44 -0800 Subject: [PATCH 10/14] Radix 4 divsqrt --- pipelined/config/shared/wally-shared.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index cc24c42f..044bd7d7 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -109,7 +109,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3) // `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input From 61230c967ceb460022f7db428267ae838a90085c Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:10:47 -0800 Subject: [PATCH 11/14] simplified sign handling mux --- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 6d955d61..1dd11b3f 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -52,9 +52,6 @@ module fdivsqrtpostproc( logic [`DIVb:0] PreQmM; logic NegStickyM; logic weq0E, weq0M, WZeroM; - logic [`DIVBLEN:0] NormShiftM; - logic [`DIVb:0] NormQuotM; - logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM; logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM; logic [`XLEN-1:0] SpecialFPIntDivResultM; @@ -104,27 +101,17 @@ module fdivsqrtpostproc( assign QmM = SqrtM ? (PreQmM << 1) : PreQmM; if (`IDIV_ON_FPU) begin + logic [`DIVBLEN:0] NormShiftM; + logic [`DIVb:0] NormQuotM; + logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM; + assign W = $signed(Sum) >>> `LOGR; assign DM = {4'b0001, D}; // Integer division: sign handling for div and rem - always_comb - if (~AsM) - if (NegStickyM) begin - NormQuotM = FirstUM; - NormRemM = W + DM; - end else begin - NormQuotM = FirstU; - NormRemM = W; - end - else - if (NegStickyM) begin - NormQuotM = FirstUM; - NormRemM = -(W + DM); - end else begin - NormQuotM = FirstU; - NormRemM = -W; - end + mux2 #(`DIVb+1) normquotmux(FirstU, FirstUM, NegStickyM, NormQuotM); + mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); + mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); // Integer division: Special cases always_comb From 30dc45c76408a1d88293fa79f6dae4037a359bbd Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:17:38 -0800 Subject: [PATCH 12/14] removed duplicate quotient mux --- pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 1dd11b3f..258f0eb5 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -102,14 +102,12 @@ module fdivsqrtpostproc( if (`IDIV_ON_FPU) begin logic [`DIVBLEN:0] NormShiftM; - logic [`DIVb:0] NormQuotM; logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM; assign W = $signed(Sum) >>> `LOGR; assign DM = {4'b0001, D}; - // Integer division: sign handling for div and rem - mux2 #(`DIVb+1) normquotmux(FirstU, FirstUM, NegStickyM, NormQuotM); + // Integer remainder: sticky and sign correction muxes mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); @@ -129,7 +127,7 @@ module fdivsqrtpostproc( IntRemM = '0; end end else begin - PreIntQuotM = {3'b000, NormQuotM}; + PreIntQuotM = {3'b000, PreQmM}; IntRemM = NormRemM; end // flip sign if necessary @@ -147,7 +145,7 @@ module fdivsqrtpostproc( end - // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted + // integer division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases From bd16fd79d42506a7263f8180f0889ac81aa0fb4d Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:34:26 -0800 Subject: [PATCH 13/14] started simplifying integer division special cases --- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 258f0eb5..80d9e4b0 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -111,6 +111,43 @@ module fdivsqrtpostproc( mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); + // special case logic + always_comb + if (ALTBM) begin + if (RemOpM) PreFPIntDivResultM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; + else PreFPIntDivResultM = '0; + // IntQuotM = '0; + // IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; + end else begin + logic [`DIVb+3:0] PreIntQuotM; + if (WZeroM) begin + if (weq0M) begin + PreIntQuotM = {3'b000, FirstU}; + IntRemM = '0; + end else begin + PreIntQuotM = {3'b000, FirstUM}; + IntRemM = '0; + end + end else begin + PreIntQuotM = {3'b000, PreQmM}; + IntRemM = NormRemM; + end + // flip sign if necessary + if (NegQuotM) IntQuotM = -PreIntQuotM; + else IntQuotM = PreIntQuotM; + if (RemOpM) begin + NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder + PreResultM = IntRemM; + end else begin + NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); + PreResultM = IntQuotM; + end + PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); + end + + assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases + +/* // Integer division: Special cases always_comb if (ALTBM) begin @@ -149,6 +186,7 @@ module fdivsqrtpostproc( assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases +*/ // sign extend result for W64 if (`XLEN==64) From 58218dbdd178dbc980f22432e2c5be845ca4f03e Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 30 Dec 2022 07:40:28 -0800 Subject: [PATCH 14/14] continued simplifying integer division special cases --- .../src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 53 +++---------------- 1 file changed, 7 insertions(+), 46 deletions(-) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 80d9e4b0..c78738a4 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -113,9 +113,12 @@ module fdivsqrtpostproc( // special case logic always_comb - if (ALTBM) begin - if (RemOpM) PreFPIntDivResultM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; - else PreFPIntDivResultM = '0; + if (BZeroM) begin + if (RemOpM) SpecialFPIntDivResultM = AM; + else SpecialFPIntDivResultM = {(`XLEN){1'b1}}; + end else if (ALTBM) begin + if (RemOpM) SpecialFPIntDivResultM = AM; + else SpecialFPIntDivResultM = '0; // IntQuotM = '0; // IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; end else begin @@ -143,51 +146,9 @@ module fdivsqrtpostproc( PreResultM = IntQuotM; end PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); + SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0]; end - assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases - -/* - // Integer division: Special cases - always_comb - if (ALTBM) begin - IntQuotM = '0; - IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM}; - end else begin - logic [`DIVb+3:0] PreIntQuotM; - if (WZeroM) begin - if (weq0M) begin - PreIntQuotM = {3'b000, FirstU}; - IntRemM = '0; - end else begin - PreIntQuotM = {3'b000, FirstUM}; - IntRemM = '0; - end - end else begin - PreIntQuotM = {3'b000, PreQmM}; - IntRemM = NormRemM; - end - // flip sign if necessary - if (NegQuotM) IntQuotM = -PreIntQuotM; - else IntQuotM = PreIntQuotM; - end - - always_comb - if (RemOpM) begin - NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder - PreResultM = IntRemM; - end else begin - NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); - PreResultM = IntQuotM; - end - - - // integer division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted - - assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); - assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases -*/ - // sign extend result for W64 if (`XLEN==64) assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} :