From 8b887755c9b60ae4d13e5b9727f558883e37910c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 02:34:35 -0700 Subject: [PATCH 1/9] Simplified 3:1 mux to 2:1 mux when only Zbkc is supported and clmulr is not needed --- src/ieu/bmu/bitmanipalu.sv | 2 +- src/ieu/bmu/zbc.sv | 36 ++++++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index cd6bc1993..76734f97f 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -93,7 +93,7 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) ( // ZBC and ZBKCUnit if (P.ZBC_SUPPORTED | P.ZBKC_SUPPORTED) begin: zbc - zbc #(P.XLEN) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult); + zbc #(P) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult); end else assign ZBCResult = '0; // ZBB Unit diff --git a/src/ieu/bmu/zbc.sv b/src/ieu/bmu/zbc.sv index 6e1948c33..cb63eb85a 100644 --- a/src/ieu/bmu/zbc.sv +++ b/src/ieu/bmu/zbc.sv @@ -28,23 +28,31 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module zbc #(parameter WIDTH=32) ( - input logic [WIDTH-1:0] A, RevA, B, // Operands - input logic [2:0] Funct3, // Indicates operation to perform - output logic [WIDTH-1:0] ZBCResult); // ZBC result +module zbc import cvw::*; #(parameter cvw_t P) ( + input logic [P.XLEN-1:0] A, RevA, B, // Operands + input logic [2:0] Funct3, // Indicates operation to perform + output logic [P.XLEN-1:0] ZBCResult); // ZBC result - logic [WIDTH-1:0] ClmulResult, RevClmulResult; - logic [WIDTH-1:0] RevB; - logic [WIDTH-1:0] X, Y; + logic [P.XLEN-1:0] ClmulResult, RevClmulResult; + logic [P.XLEN-1:0] RevB; + logic [P.XLEN-1:0] X, Y; - bitreverse #(WIDTH) brB(B, RevB); + bitreverse #(P.XLEN) brB(B, RevB); - mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X); - mux2 #(WIDTH) ymux(RevB, B, ~Funct3[1], Y); + // choose X = A for clmul, Rev(A) << 1 for clmulh, Rev(A) for clmulr + // unshifted Rev(A) source is only needed for clmulr in ZBC, not in ZBKC + if (P.ZBC_SUPPORTED) + mux3 #(P.XLEN) xmux({RevA[P.XLEN-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X); + else + mux2 #(P.XLEN) xmux(A, {RevA[P.XLEN-2:0], {1'b0}}, Funct3[1], X); - clmul #(WIDTH) clm(.X, .Y, .ClmulResult); - - bitreverse #(WIDTH) brClmulResult(ClmulResult, RevClmulResult); + // choose X = B for clmul, Rev(B) for clmulH + mux2 #(P.XLEN) ymux(B, RevB, Funct3[1], Y); - mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult); + // carry free multiplier + clmul #(P.XLEN) clm(.X, .Y, .ClmulResult); + + // choose result = rev(X @ Y) for clmulh/clmulr + bitreverse #(P.XLEN) brClmulResult(ClmulResult, RevClmulResult); + mux2 #(P.XLEN) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult); endmodule From b4bddf13e64d7f8380813957c447a729e3976170 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 04:40:01 -0700 Subject: [PATCH 2/9] Fixed typo in derivgen --- bin/derivgen.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/derivgen.pl b/bin/derivgen.pl index 442455c53..21ffc7019 100755 --- a/bin/derivgen.pl +++ b/bin/derivgen.pl @@ -90,7 +90,7 @@ foreach my $key (@derivnames) { my $datestring = localtime(); my %hit = (); - print $fh "// Config $key automatically derived from $basederiv{$key} on $datestring usubg derivgen.pl\n"; + print $fh "// Config $key automatically derived from $basederiv{$key} on $datestring using derivgen.pl\n"; foreach my $line (<$unmod>) { foreach my $entry (@{$derivs{$key}}) { my @ent = @{$entry}; From 9bd5bd837b39acb8fe80d64ada62719b40e0761b Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 04:48:24 -0700 Subject: [PATCH 3/9] Removed duplicate bpred 10_16_16 entries from derivlist --- config/derivlist.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/config/derivlist.txt b/config/derivlist.txt index 174ca5191..d2c939a77 100644 --- a/config/derivlist.txt +++ b/config/derivlist.txt @@ -296,9 +296,6 @@ RAS_SIZE 32'd6 deriv bpred_GSHARE_10_10_10_1_rv32gc rv32gc RAS_SIZE 32'd10 -deriv bpred_GSHARE_10_16_10_1_rv32gc rv32gc -RAS_SIZE 32'd16 - deriv bpred_GSHARE_10_16_6_1_rv32gc rv32gc BTB_SIZE 32'd6 @@ -368,9 +365,6 @@ INSTR_CLASS_PRED 0 deriv bpred_GSHARE_10_10_10_0_rv32gc bpred_GSHARE_10_10_10_1_rv32gc INSTR_CLASS_PRED 0 -deriv bpred_GSHARE_10_16_10_0_rv32gc bpred_GSHARE_10_16_10_1_rv32gc -INSTR_CLASS_PRED 0 - deriv bpred_GSHARE_10_16_6_0_rv32gc bpred_GSHARE_10_16_6_1_rv32gc INSTR_CLASS_PRED 0 From 5094122048480057c0b95d46f6927a96f2b64164 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 06:11:55 -0700 Subject: [PATCH 4/9] Simplifying fround --- src/fpu/fpu.sv | 2 +- src/fpu/fround.sv | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 4cf17890c..ba986dadc 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -281,7 +281,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // fround fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE), - .XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE), + .XNaN(XNaNE), .XSNaN(XSNaNE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE), .ZfaFRoundNX(ZfaFRoundNXE), .FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE)); diff --git a/src/fpu/fround.sv b/src/fpu/fround.sv index 64700834a..307b8c4f8 100644 --- a/src/fpu/fround.sv +++ b/src/fpu/fround.sv @@ -34,7 +34,6 @@ module fround import cvw::*; #(parameter cvw_t P) ( input logic [P.NF:0] Xm, // input's fraction with leading integer bit (U1.NF) input logic XNaN, // X is NaN input logic XSNaN, // X is Signalling NaN - input logic XZero, // X is Zero input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) input logic [2:0] Frm, // rounding mode input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format @@ -47,7 +46,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( logic [P.NE-1:0] E, Xep1, EminusNf; logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd; logic [P.FLEN-1:0] W, PackedW; - logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf, Exact; + logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf; // Unbiased exponent assign E = Xe - P.BIAS[P.NE-1:0]; @@ -78,7 +77,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( assign Eeqm1 = ($signed(E) == -1); // Logic for nonnegative mask and rounding bits - assign IMask = {1'b1, {P.NF{1'b0}}} >>> E; + assign IMask = {1'b1, {P.NF{1'b0}}} >>> E; /// if E > Nf, this produces all 0s instead of all 1s. Hence exact handling is needed below. assign Tmasknonneg = ~IMask >>> 1'b1; assign HotE = IMask & ~(IMask << 1'b1); assign HotEP1 = HotE >> 1'b1; @@ -121,7 +120,6 @@ module fround import cvw::*; #(parameter cvw_t P) ( assign EminusNf = E - Nf; /* verilator lint_on WIDTH */ assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive - assign Exact = (EgeNf | XZero) & ~XNaN; // result will be exact; no need to round // Rounding logic: determine whether to round up in magnitude always_comb begin @@ -136,7 +134,8 @@ module fround import cvw::*; #(parameter cvw_t P) ( // If result is not exact, select output in unpacked FLEN format initially if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN - else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1 + else if (EgeNf) W = {Xs, Xe, Xm[P.NF-1:0]}; // Exact, no rounding needed + else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1 if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1 else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0 else begin // |X| >= 1 rounds to an integer @@ -146,11 +145,10 @@ module fround import cvw::*; #(parameter cvw_t P) ( end end - packoutput #(P) packoutput(W, Fmt, PackedW); // pack and NaN-box based on selected format. - mux2 #(P.FLEN) resultmux(PackedW, X, Exact, FRound); + packoutput #(P) packoutput(W, Fmt, FRound); // pack and NaN-box based on selected format. // Flags assign FRoundNV = XSNaN; // invalid if input is signaling NaN - assign FRoundNX = ZfaFRoundNX & ~(XNaN | Exact) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction + assign FRoundNX = ZfaFRoundNX & ~(XNaN | EgeNf) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction endmodule From 1873064be51f8fdf168b117b6242dfcf2b4f4a0a Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 06:23:42 -0700 Subject: [PATCH 5/9] Simplified fround exact case --- src/fpu/fround.sv | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/fpu/fround.sv b/src/fpu/fround.sv index 307b8c4f8..d905618ba 100644 --- a/src/fpu/fround.sv +++ b/src/fpu/fround.sv @@ -43,7 +43,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( output logic FRoundNX // fround inexact ); - logic [P.NE-1:0] E, Xep1, EminusNf; + logic [P.NE-1:0] E, Xep1; logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd; logic [P.FLEN-1:0] W, PackedW; logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf; @@ -99,7 +99,7 @@ module fround import cvw::*; #(parameter cvw_t P) ( // if (X is NaN) // W = Canonical NaN // Invalid = (X is signaling NaN) - // else if (E >= Nf or X is +/- 0) + // else if (E >= Nf) // W = X // is exact; this also handles infinity // else // RoundUp = RoundingLogic(Xs, L', R', T', rm) // Table 16.4 @@ -116,10 +116,9 @@ module fround import cvw::*; #(parameter cvw_t P) ( /////////////////////////// // Exact logic - /* verilator lint_off WIDTH */ - assign EminusNf = E - Nf; - /* verilator lint_on WIDTH */ - assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive + // verilator lint_off WIDTHEXPAND + assign EgeNf = (E >= Nf) & Xe[P.NE-1]; // Check if E >= Nf. Also check that Xe is positive to avoid wraparound problems + // verilator lint_on WIDTHEXPAND // Rounding logic: determine whether to round up in magnitude always_comb begin @@ -133,22 +132,22 @@ module fround import cvw::*; #(parameter cvw_t P) ( endcase // If result is not exact, select output in unpacked FLEN format initially - if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN - else if (EgeNf) W = {Xs, Xe, Xm[P.NF-1:0]}; // Exact, no rounding needed - else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1 - if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1 - else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0 - else begin // |X| >= 1 rounds to an integer - if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0 - else if (RoundUp) W = {Xs, Xe, Rnd[P.NF-1:0]}; // Round up to Rnd - else W = {Xs, Xe, Trunc[P.NF-1:0]}; // Round down to Trunc + if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN + else if (EgeNf) W = {Xs, Xe, Xm[P.NF-1:0]}; // Exact, no rounding needed + else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1 + if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1 + else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0 + else begin // |X| >= 1 rounds to an integer + if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0 + else if (RoundUp) W = {Xs, Xe, Rnd[P.NF-1:0]}; // Round up to Rnd + else W = {Xs, Xe, Trunc[P.NF-1:0]}; // Round down to Trunc end end packoutput #(P) packoutput(W, Fmt, FRound); // pack and NaN-box based on selected format. // Flags - assign FRoundNV = XSNaN; // invalid if input is signaling NaN - assign FRoundNX = ZfaFRoundNX & ~(XNaN | EgeNf) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction + assign FRoundNV = XSNaN; // invalid if input is signaling NaN + assign FRoundNX = ZfaFRoundNX & ~EgeNf & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction endmodule From 4c066c078fa07a6d25975348942f3c7ca451d23e Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 07:38:03 -0700 Subject: [PATCH 6/9] Removing two unnecessary 0's from fmashiftcalc interface --- src/fpu/postproc/fmashiftcalc.sv | 20 +++++++++----------- src/fpu/postproc/postprocess.sv | 9 ++++----- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index 27f39e2a5..3a03aff8f 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -28,18 +28,17 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( - input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [P.NE+1:0] FmaSe, // sum's exponent - input logic [P.FMALEN-1:0] FmaSm, // the positive sum + input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [P.NE+1:0] FmaSe, // sum's exponent + input logic [P.FMALEN-1:0] FmaSm, // the positive sum input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // normalization shift count - output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results - output logic FmaSZero, // is the sum zero - output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt, // normalization shift count - output logic [P.FMALEN+1:0] FmaShiftIn + output logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + output logic FmaSZero, // is the sum zero + output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + output logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt // normalization shift count ); - logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias - logic [P.NE+1:0] BiasCorr; // correction for bias + logic [P.NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the P.FLEN bias + logic [P.NE+1:0] BiasCorr; // correction for bias /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -130,7 +129,6 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // set and calculate the shift input and amount // - shift once if killing a product and the result is subnormal - assign FmaShiftIn = {2'b0, FmaSm}; if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1; else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 4e893a82e..20968dad7 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -44,7 +44,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic FmaPs, // the product's sign input logic FmaSs, // Sum sign input logic [P.NE+1:0] FmaSe, // the sum's exponent - input logic [P.FMALEN-1:0] FmaSm, // the positive sum + input logic [P.FMALEN-1:0] FmaSm, // the positive sum input logic FmaASticky, // sticky bit that is calculated during alignment input logic [$clog2(P.FMALEN+1)-1:0] FmaSCnt, // the normalization shift count //divide signals @@ -86,7 +86,6 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // fma signals logic [P.NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero - logic [P.FMALEN+1:0] FmaShiftIn; // fma shift input logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt; // normalization shift amount for fma @@ -145,8 +144,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( cvtshiftcalc #(P) cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, - .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); + fmashiftcalc #(P) fmashiftcalc(.FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSm, + .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt); divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); @@ -155,7 +154,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( case(PostProcSel) 2'b10: begin // fma ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.FMALEN-1){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}}; + ShiftIn = {{2'b00, FmaSm}, {P.NORMSHIFTSZ-(P.FMALEN+2){1'b0}}}; end 2'b00: begin // cvt ShiftAmt = {{P.LOGNORMSHIFTSZ-$clog2(P.CVTLEN+1){1'b0}}, CvtShiftAmt}; From 3284dd21126da2166ba2aa6d91e2a893664cfa36 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 07:45:03 -0700 Subject: [PATCH 7/9] Removed unnecessary Zero checking on FmaPreResultSubnorm --- src/fpu/postproc/fmashiftcalc.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index 3a03aff8f..1d33f7337 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -78,19 +78,19 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( assign NormSumExp = PreNormSumExp+BiasCorr; end - // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero + // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) if (P.FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-1)); // changed from -2 dh 4/3/24 for issue 655 - assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL; end else if (P.FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF-1)); // changed from -2 dh 4/3/24 for issue 655 assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((P.NE+2)'(P.BIAS-P.BIAS1)); assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF1-1+P.BIAS-P.BIAS1)) | ~|PreNormSumExp; - assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; + assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL); end else if (P.FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; assign Sum0LEZ = PreNormSumExp[P.NE+1] | ~|PreNormSumExp; @@ -101,9 +101,9 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.NF2-1+P.BIAS-P.BIAS2)) | ~|PreNormSumExp; always_comb begin case (Fmt) - P.FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL; // & ~FmaSZero; // checking sum is not zero is harmless but turns out to be unnecessary - P.FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL; // & ~FmaSZero; - P.FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL; // & ~FmaSZero; + P.FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL; + P.FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL; + P.FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL; default: FmaPreResultSubnorm = 1'bx; endcase end @@ -119,10 +119,10 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((P.NE+2)'(-P.H_NF-1+P.BIAS-P.H_BIAS)) | ~|PreNormSumExp; always_comb begin case (Fmt) - 2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - 2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; - 2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; - 2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; + 2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL; + 2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL; + 2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL; + 2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL; endcase end end From e02c1008bc581684ef11902c1f3a54589f071bf8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 10 Jun 2024 07:55:35 -0700 Subject: [PATCH 8/9] postprocessor shift amount simplification --- src/fpu/postproc/divshiftcalc.sv | 4 ---- src/fpu/postproc/fmashiftcalc.sv | 4 ++-- src/fpu/postproc/postprocess.sv | 5 ++--- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv index 0a222d724..d45afeea6 100644 --- a/src/fpu/postproc/divshiftcalc.sv +++ b/src/fpu/postproc/divshiftcalc.sv @@ -28,10 +28,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module divshiftcalc import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb:0] DivUm, // divsqrt significand input logic [P.NE+1:0] DivUe, // divsqrt exponent output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount - output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input output logic DivResSubnorm, // is the divsqrt result subnormal output logic DivSubnormShiftPos // is the subnormal shift amount positive ); @@ -68,6 +66,4 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0; assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; - // pre-shift the divider result for normalization - assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; endmodule diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index 1d33f7337..cf334aa9b 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -53,6 +53,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( //convert the sum's exponent into the proper precision if (P.FPSIZES == 1) begin assign NormSumExp = PreNormSumExp; + assign BiasCorr = '0; end else if (P.FPSIZES == 2) begin assign BiasCorr = Fmt ? (P.NE+2)'(0) : (P.NE+2)'(P.BIAS1-P.BIAS); assign NormSumExp = PreNormSumExp+BiasCorr; @@ -129,6 +130,5 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( // set and calculate the shift input and amount // - shift once if killing a product and the result is subnormal - if (P.FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3): FmaSCnt+1; - else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1; + assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(P.FMALEN-1)-1:0]+($clog2(P.FMALEN-1))'(P.NF+3)+BiasCorr[$clog2(P.FMALEN-1)-1:0]: FmaSCnt+1; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 20968dad7..2db03cb16 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -91,7 +91,6 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( logic [$clog2(P.FMALEN+1)-1:0] FmaShiftAmt; // normalization shift amount for fma // division signals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount - logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift logic DivByZero; // divide by zero flag logic DivResSubnorm; // is the divsqrt result subnormal @@ -147,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( fmashiftcalc #(P) fmashiftcalc(.FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSm, .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt); - divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc #(P) divshiftcalc(.DivUe, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt); // select which unit's output to shift always_comb @@ -162,7 +161,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( end 2'b01: begin //divsqrt ShiftAmt = DivShiftAmt; - ShiftIn = DivShiftIn; + ShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; end default: begin ShiftAmt = {P.LOGNORMSHIFTSZ{1'bx}}; From 29fe5983e266041fc39a88dadc3972380a1ef64c Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 11 Jun 2024 12:32:11 -0700 Subject: [PATCH 9/9] Fixed testfloat regression and added bitmanip/crypto variants --- bin/regression-wally | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/bin/regression-wally b/bin/regression-wally index 4e72fae66..f85800bba 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -99,6 +99,29 @@ derivconfigtests = [ ["zaamo_rv32gc", ["arch32i", "arch32a_amo"]], ["zalrsc_rv32gc", ["arch32i", "wally32a_lrsc"]], +# Bit manipulation and crypto variants + ["zba_rv32gc", ["arch32i", "arch32zba"]], + ["zbb_rv32gc", ["arch32i", "arch32zbb"]], + ["zbc_rv32gc", ["arch32i", "arch32zbc"]], + ["zbs_rv32gc", ["arch32i", "arch32zbs"]], + ["zbkb_rv32gc", ["arch32i", "arch32zbkb"]], + ["zbkc_rv32gc", ["arch32i", "arch32zbkc"]], + ["zbkx_rv32gc", ["arch32i", "arch32zbkx"]], + ["zkne_rv32gc", ["arch32i", "arch32zkne"]], + ["zknd_rv32gc", ["arch32i", "arch32zknd"]], + ["zknh_rv32gc", ["arch32i", "arch32zknh"]], + + ["zba_rv64gc", ["arch64i", "arch64zba"]], + ["zbb_rv64gc", ["arch64i", "arch64zbb"]], + ["zbc_rv64gc", ["arch64i", "arch64zbc"]], + ["zbs_rv64gc", ["arch64i", "arch64zbs"]], + ["zbkb_rv64gc", ["arch64i", "arch64zbkb"]], + ["zbkc_rv64gc", ["arch64i", "arch64zbkc"]], + ["zbkx_rv64gc", ["arch64i", "arch64zbkx"]], + ["zkne_rv64gc", ["arch64i", "arch64zkne"]], + ["zknd_rv64gc", ["arch64i", "arch64zknd"]], + ["zknh_rv64gc", ["arch64i", "arch64zknh"]], + ### add misaligned tests # fp/int divider permutations @@ -325,7 +348,8 @@ else: # run derivative configurations in nightly regression if (nightly): - addTests(tests_buildrootboot, defaultsim) +# addTests(tests_buildrootboot, defaultsim) + addTests(tests_buildrootshort, defaultsim) addTests(derivconfigtests, defaultsim) else: addTests(tests_buildrootshort, defaultsim) @@ -389,7 +413,7 @@ if (testfloat or nightly): # for nightly, run testfloat along with othres tc = TestCase( name=test, variant=config, - cmd="wsim --tb testbench_fp --sim questa " + config + " " + test + " > " + sim_log, + cmd="wsim --tb testbench_fp " + config + " " + test + " > " + sim_log, grepstr="All Tests completed with 0 errors", grepfile = WALLY + "/sim/questa/logs/"+config+"_"+test+".log") configs.append(tc) @@ -415,7 +439,7 @@ def main(): elif '--nightly' in sys.argv: TIMEOUT_DUR = 60*1440 # 1 day elif '--testfloat' in sys.argv: - TIMEOUT_DUR = 5*60 # seconds + TIMEOUT_DUR = 30*60 # seconds else: TIMEOUT_DUR = 10*60 # seconds