Merge pull request #829 from davidharrishmc/dev

Various small optimizations
This commit is contained in:
Rose Thompson 2024-06-10 12:40:49 -07:00 committed by GitHub
commit c5b1338697
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 44 additions and 45 deletions

View File

@ -90,7 +90,7 @@ foreach my $key (@derivnames) {
my $datestring = localtime(); my $datestring = localtime();
my %hit = (); my %hit = ();
print $fh "// Config $key automatically derived from $basederiv{$key} on $datestring usubg derivgen.pl\n"; print $fh "// Config $key automatically derived from $basederiv{$key} on $datestring using derivgen.pl\n";
foreach my $line (<$unmod>) { foreach my $line (<$unmod>) {
foreach my $entry (@{$derivs{$key}}) { foreach my $entry (@{$derivs{$key}}) {
my @ent = @{$entry}; my @ent = @{$entry};

View File

@ -296,9 +296,6 @@ RAS_SIZE 32'd6
deriv bpred_GSHARE_10_10_10_1_rv32gc rv32gc deriv bpred_GSHARE_10_10_10_1_rv32gc rv32gc
RAS_SIZE 32'd10 RAS_SIZE 32'd10
deriv bpred_GSHARE_10_16_10_1_rv32gc rv32gc
RAS_SIZE 32'd16
deriv bpred_GSHARE_10_16_6_1_rv32gc rv32gc deriv bpred_GSHARE_10_16_6_1_rv32gc rv32gc
BTB_SIZE 32'd6 BTB_SIZE 32'd6
@ -368,9 +365,6 @@ INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_10_10_0_rv32gc bpred_GSHARE_10_10_10_1_rv32gc deriv bpred_GSHARE_10_10_10_0_rv32gc bpred_GSHARE_10_10_10_1_rv32gc
INSTR_CLASS_PRED 0 INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_16_10_0_rv32gc bpred_GSHARE_10_16_10_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_16_6_0_rv32gc bpred_GSHARE_10_16_6_1_rv32gc deriv bpred_GSHARE_10_16_6_0_rv32gc bpred_GSHARE_10_16_6_1_rv32gc
INSTR_CLASS_PRED 0 INSTR_CLASS_PRED 0

View File

@ -281,7 +281,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
// fround // fround
fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE), fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE),
.XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE), .XNaN(XNaNE), .XSNaN(XSNaNE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE),
.ZfaFRoundNX(ZfaFRoundNXE), .ZfaFRoundNX(ZfaFRoundNXE),
.FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE)); .FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE));

View File

@ -34,7 +34,6 @@ module fround import cvw::*; #(parameter cvw_t P) (
input logic [P.NF:0] Xm, // input's fraction with leading integer bit (U1.NF) input logic [P.NF:0] Xm, // input's fraction with leading integer bit (U1.NF)
input logic XNaN, // X is NaN input logic XNaN, // X is NaN
input logic XSNaN, // X is Signalling NaN input logic XSNaN, // X is Signalling NaN
input logic XZero, // X is Zero
input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
input logic [2:0] Frm, // rounding mode input logic [2:0] Frm, // rounding mode
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
@ -44,10 +43,10 @@ module fround import cvw::*; #(parameter cvw_t P) (
output logic FRoundNX // fround inexact output logic FRoundNX // fround inexact
); );
logic [P.NE-1:0] E, Xep1, EminusNf; logic [P.NE-1:0] E, Xep1;
logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd; logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
logic [P.FLEN-1:0] W, PackedW; logic [P.FLEN-1:0] W, PackedW;
logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf, Exact; logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf;
// Unbiased exponent // Unbiased exponent
assign E = Xe - P.BIAS[P.NE-1:0]; assign E = Xe - P.BIAS[P.NE-1:0];
@ -78,7 +77,7 @@ module fround import cvw::*; #(parameter cvw_t P) (
assign Eeqm1 = ($signed(E) == -1); assign Eeqm1 = ($signed(E) == -1);
// Logic for nonnegative mask and rounding bits // Logic for nonnegative mask and rounding bits
assign IMask = {1'b1, {P.NF{1'b0}}} >>> E; assign IMask = {1'b1, {P.NF{1'b0}}} >>> E; /// if E > Nf, this produces all 0s instead of all 1s. Hence exact handling is needed below.
assign Tmasknonneg = ~IMask >>> 1'b1; assign Tmasknonneg = ~IMask >>> 1'b1;
assign HotE = IMask & ~(IMask << 1'b1); assign HotE = IMask & ~(IMask << 1'b1);
assign HotEP1 = HotE >> 1'b1; assign HotEP1 = HotE >> 1'b1;
@ -100,7 +99,7 @@ module fround import cvw::*; #(parameter cvw_t P) (
// if (X is NaN) // if (X is NaN)
// W = Canonical NaN // W = Canonical NaN
// Invalid = (X is signaling NaN) // Invalid = (X is signaling NaN)
// else if (E >= Nf or X is +/- 0) // else if (E >= Nf)
// W = X // is exact; this also handles infinity // W = X // is exact; this also handles infinity
// else // else
// RoundUp = RoundingLogic(Xs, L', R', T', rm) // Table 16.4 // RoundUp = RoundingLogic(Xs, L', R', T', rm) // Table 16.4
@ -117,11 +116,9 @@ module fround import cvw::*; #(parameter cvw_t P) (
/////////////////////////// ///////////////////////////
// Exact logic // Exact logic
/* verilator lint_off WIDTH */ // verilator lint_off WIDTHEXPAND
assign EminusNf = E - Nf; assign EgeNf = (E >= Nf) & Xe[P.NE-1]; // Check if E >= Nf. Also check that Xe is positive to avoid wraparound problems
/* verilator lint_on WIDTH */ // verilator lint_on WIDTHEXPAND
assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive
assign Exact = (EgeNf | XZero) & ~XNaN; // result will be exact; no need to round
// Rounding logic: determine whether to round up in magnitude // Rounding logic: determine whether to round up in magnitude
always_comb begin always_comb begin
@ -136,6 +133,7 @@ module fround import cvw::*; #(parameter cvw_t P) (
// If result is not exact, select output in unpacked FLEN format initially // If result is not exact, select output in unpacked FLEN format initially
if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN
else if (EgeNf) W = {Xs, Xe, Xm[P.NF-1:0]}; // Exact, no rounding needed
else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1 else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1
if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1 if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1
else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0 else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0
@ -146,11 +144,10 @@ module fround import cvw::*; #(parameter cvw_t P) (
end end
end end
packoutput #(P) packoutput(W, Fmt, PackedW); // pack and NaN-box based on selected format. packoutput #(P) packoutput(W, Fmt, FRound); // pack and NaN-box based on selected format.
mux2 #(P.FLEN) resultmux(PackedW, X, Exact, FRound);
// Flags // Flags
assign FRoundNV = XSNaN; // invalid if input is signaling NaN assign FRoundNV = XSNaN; // invalid if input is signaling NaN
assign FRoundNX = ZfaFRoundNX & ~(XNaN | Exact) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction assign FRoundNX = ZfaFRoundNX & ~EgeNf & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction
endmodule endmodule

View File

@ -93,7 +93,7 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) (
// ZBC and ZBKCUnit // ZBC and ZBKCUnit
if (P.ZBC_SUPPORTED | P.ZBKC_SUPPORTED) begin: zbc if (P.ZBC_SUPPORTED | P.ZBKC_SUPPORTED) begin: zbc
zbc #(P.XLEN) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult); zbc #(P) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult);
end else assign ZBCResult = '0; end else assign ZBCResult = '0;
// ZBB Unit // ZBB Unit

View File

@ -28,23 +28,31 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
module zbc #(parameter WIDTH=32) ( module zbc import cvw::*; #(parameter cvw_t P) (
input logic [WIDTH-1:0] A, RevA, B, // Operands input logic [P.XLEN-1:0] A, RevA, B, // Operands
input logic [2:0] Funct3, // Indicates operation to perform input logic [2:0] Funct3, // Indicates operation to perform
output logic [WIDTH-1:0] ZBCResult); // ZBC result output logic [P.XLEN-1:0] ZBCResult); // ZBC result
logic [WIDTH-1:0] ClmulResult, RevClmulResult; logic [P.XLEN-1:0] ClmulResult, RevClmulResult;
logic [WIDTH-1:0] RevB; logic [P.XLEN-1:0] RevB;
logic [WIDTH-1:0] X, Y; logic [P.XLEN-1:0] X, Y;
bitreverse #(WIDTH) brB(B, RevB); bitreverse #(P.XLEN) brB(B, RevB);
mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X); // choose X = A for clmul, Rev(A) << 1 for clmulh, Rev(A) for clmulr
mux2 #(WIDTH) ymux(RevB, B, ~Funct3[1], Y); // unshifted Rev(A) source is only needed for clmulr in ZBC, not in ZBKC
if (P.ZBC_SUPPORTED)
mux3 #(P.XLEN) xmux({RevA[P.XLEN-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X);
else
mux2 #(P.XLEN) xmux(A, {RevA[P.XLEN-2:0], {1'b0}}, Funct3[1], X);
clmul #(WIDTH) clm(.X, .Y, .ClmulResult); // choose X = B for clmul, Rev(B) for clmulH
mux2 #(P.XLEN) ymux(B, RevB, Funct3[1], Y);
bitreverse #(WIDTH) brClmulResult(ClmulResult, RevClmulResult); // carry free multiplier
clmul #(P.XLEN) clm(.X, .Y, .ClmulResult);
mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult); // choose result = rev(X @ Y) for clmulh/clmulr
bitreverse #(P.XLEN) brClmulResult(ClmulResult, RevClmulResult);
mux2 #(P.XLEN) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
endmodule endmodule