mirror of
				https://github.com/openhwgroup/cvw
				synced 2025-02-11 06:05:49 +00:00 
			
		
		
		
	Merge pull request #829 from davidharrishmc/dev
Various small optimizations
This commit is contained in:
		
						commit
						c5b1338697
					
				@ -90,7 +90,7 @@ foreach my $key (@derivnames) {
 | 
			
		||||
 | 
			
		||||
    my $datestring = localtime();
 | 
			
		||||
    my %hit = ();
 | 
			
		||||
    print $fh "// Config $key automatically derived from $basederiv{$key} on $datestring usubg derivgen.pl\n";
 | 
			
		||||
    print $fh "// Config $key automatically derived from $basederiv{$key} on $datestring using derivgen.pl\n";
 | 
			
		||||
    foreach my $line (<$unmod>) {
 | 
			
		||||
        foreach my $entry (@{$derivs{$key}}) {    
 | 
			
		||||
            my @ent = @{$entry};
 | 
			
		||||
 | 
			
		||||
@ -296,9 +296,6 @@ RAS_SIZE          32'd6
 | 
			
		||||
deriv bpred_GSHARE_10_10_10_1_rv32gc rv32gc
 | 
			
		||||
RAS_SIZE          32'd10
 | 
			
		||||
 | 
			
		||||
deriv bpred_GSHARE_10_16_10_1_rv32gc rv32gc
 | 
			
		||||
RAS_SIZE          32'd16
 | 
			
		||||
 | 
			
		||||
deriv bpred_GSHARE_10_16_6_1_rv32gc rv32gc
 | 
			
		||||
BTB_SIZE          32'd6
 | 
			
		||||
 | 
			
		||||
@ -368,9 +365,6 @@ INSTR_CLASS_PRED          0
 | 
			
		||||
deriv bpred_GSHARE_10_10_10_0_rv32gc bpred_GSHARE_10_10_10_1_rv32gc
 | 
			
		||||
INSTR_CLASS_PRED          0
 | 
			
		||||
 | 
			
		||||
deriv bpred_GSHARE_10_16_10_0_rv32gc bpred_GSHARE_10_16_10_1_rv32gc
 | 
			
		||||
INSTR_CLASS_PRED          0
 | 
			
		||||
 | 
			
		||||
deriv bpred_GSHARE_10_16_6_0_rv32gc bpred_GSHARE_10_16_6_1_rv32gc
 | 
			
		||||
INSTR_CLASS_PRED          0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -281,7 +281,7 @@ module fpu import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
 | 
			
		||||
    // fround
 | 
			
		||||
    fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE), 
 | 
			
		||||
                       .XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE), 
 | 
			
		||||
                       .XNaN(XNaNE), .XSNaN(XSNaNE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE), 
 | 
			
		||||
                       .ZfaFRoundNX(ZfaFRoundNXE),
 | 
			
		||||
                       .FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -34,7 +34,6 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
  input  logic [P.NF:0]           Xm,           // input's fraction with leading integer bit (U1.NF)
 | 
			
		||||
  input  logic                    XNaN,         // X is NaN
 | 
			
		||||
  input  logic                    XSNaN,        // X is Signalling NaN
 | 
			
		||||
  input  logic                    XZero,        // X is Zero
 | 
			
		||||
  input  logic [P.FMTBITS-1:0]    Fmt,          // the input's precision (11=quad 01=double 00=single 10=half)
 | 
			
		||||
  input  logic [2:0]              Frm,          // rounding mode
 | 
			
		||||
  input  logic [P.LOGFLEN-1:0]    Nf,           // Number of fractional bits in selected format
 | 
			
		||||
@ -44,10 +43,10 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
  output logic                    FRoundNX      // fround inexact
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
  logic [P.NE-1:0] E, Xep1, EminusNf;
 | 
			
		||||
  logic [P.NE-1:0] E, Xep1;
 | 
			
		||||
  logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
 | 
			
		||||
  logic [P.FLEN-1:0] W, PackedW;
 | 
			
		||||
  logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf, Exact;
 | 
			
		||||
  logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf;
 | 
			
		||||
 | 
			
		||||
  // Unbiased exponent
 | 
			
		||||
  assign E = Xe - P.BIAS[P.NE-1:0];
 | 
			
		||||
@ -78,7 +77,7 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
  assign Eeqm1 = ($signed(E) == -1);
 | 
			
		||||
 | 
			
		||||
  // Logic for nonnegative mask and rounding bits
 | 
			
		||||
  assign IMask = {1'b1, {P.NF{1'b0}}} >>> E;
 | 
			
		||||
  assign IMask = {1'b1, {P.NF{1'b0}}} >>> E; /// if E > Nf, this produces all 0s instead of all 1s.  Hence exact handling is needed below.
 | 
			
		||||
  assign Tmasknonneg = ~IMask >>> 1'b1;
 | 
			
		||||
  assign HotE = IMask & ~(IMask << 1'b1);
 | 
			
		||||
  assign HotEP1 = HotE >> 1'b1;
 | 
			
		||||
@ -100,7 +99,7 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
  //      if (X is NaN)
 | 
			
		||||
  //              W = Canonical NaN
 | 
			
		||||
  //              Invalid = (X is signaling NaN)
 | 
			
		||||
  //      else if (E >= Nf or X is +/- 0) 
 | 
			
		||||
  //      else if (E >= Nf) 
 | 
			
		||||
  //              W = X						// is exact; this also handles infinity
 | 
			
		||||
  //      else 
 | 
			
		||||
  //              RoundUp = RoundingLogic(Xs, L', R', T', rm)	// Table 16.4
 | 
			
		||||
@ -117,11 +116,9 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
  ///////////////////////////
 | 
			
		||||
 | 
			
		||||
  // Exact logic
 | 
			
		||||
  /* verilator lint_off WIDTH */
 | 
			
		||||
  assign EminusNf = E - Nf;
 | 
			
		||||
  /* verilator lint_on WIDTH */
 | 
			
		||||
  assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive 
 | 
			
		||||
  assign Exact = (EgeNf | XZero) & ~XNaN; // result will be exact; no need to round
 | 
			
		||||
  // verilator lint_off WIDTHEXPAND
 | 
			
		||||
  assign EgeNf = (E >= Nf) & Xe[P.NE-1]; // Check if E >= Nf.  Also check that Xe is positive to avoid wraparound problems
 | 
			
		||||
  // verilator lint_on WIDTHEXPAND
 | 
			
		||||
 | 
			
		||||
  // Rounding logic: determine whether to round up in magnitude
 | 
			
		||||
  always_comb begin
 | 
			
		||||
@ -135,22 +132,22 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 | 
			
		||||
    endcase
 | 
			
		||||
 | 
			
		||||
    // If result is not exact, select output in unpacked FLEN format initially
 | 
			
		||||
    if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN
 | 
			
		||||
    else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1
 | 
			
		||||
      if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1
 | 
			
		||||
      else         W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0
 | 
			
		||||
    else begin // |X| >= 1 rounds to an integer
 | 
			
		||||
      if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0
 | 
			
		||||
      else if (RoundUp)  W = {Xs, Xe, Rnd[P.NF-1:0]};      // Round up to Rnd
 | 
			
		||||
      else               W = {Xs, Xe, Trunc[P.NF-1:0]};    // Round down to Trunc
 | 
			
		||||
    if (XNaN)            W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}};  // Canonical NaN
 | 
			
		||||
    else if (EgeNf)      W = {Xs, Xe, Xm[P.NF-1:0]};                        // Exact, no rounding needed
 | 
			
		||||
    else if (Elt0)                                                          // 0 <= |X| < 1 rounds to 0 or 1
 | 
			
		||||
      if (RoundUp)       W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}};          //   round to +/- 1
 | 
			
		||||
      else               W = {Xs, {(P.FLEN-1){1'b0}}};                      //   round to +/- 0
 | 
			
		||||
    else begin                                                              // |X| >= 1 rounds to an integer
 | 
			
		||||
      if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}};                    //   Round up to 2.0
 | 
			
		||||
      else if (RoundUp)  W = {Xs, Xe, Rnd[P.NF-1:0]};                       //   Round up to Rnd
 | 
			
		||||
      else               W = {Xs, Xe, Trunc[P.NF-1:0]};                     //   Round down to Trunc
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  packoutput #(P) packoutput(W, Fmt, PackedW); // pack and NaN-box based on selected format.
 | 
			
		||||
  mux2 #(P.FLEN) resultmux(PackedW, X, Exact, FRound);
 | 
			
		||||
  packoutput #(P) packoutput(W, Fmt, FRound); // pack and NaN-box based on selected format.
 | 
			
		||||
 | 
			
		||||
  // Flags
 | 
			
		||||
  assign FRoundNV = XSNaN;                                        // invalid if input is signaling NaN
 | 
			
		||||
  assign FRoundNX = ZfaFRoundNX & ~(XNaN | Exact) & (Rp | Tp);    // Inexact if Round or Sticky bit set for FRoundNX instruction
 | 
			
		||||
  assign FRoundNV = XSNaN;                               // invalid if input is signaling NaN
 | 
			
		||||
  assign FRoundNX = ZfaFRoundNX & ~EgeNf & (Rp | Tp);    // Inexact if Round or Sticky bit set for FRoundNX instruction
 | 
			
		||||
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
@ -93,7 +93,7 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) (
 | 
			
		||||
 | 
			
		||||
  // ZBC and ZBKCUnit
 | 
			
		||||
  if (P.ZBC_SUPPORTED | P.ZBKC_SUPPORTED) begin: zbc
 | 
			
		||||
    zbc #(P.XLEN) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult);
 | 
			
		||||
    zbc #(P) ZBC(.A(ABMU), .RevA, .B(BBMU), .Funct3, .ZBCResult);
 | 
			
		||||
  end else assign ZBCResult = '0;
 | 
			
		||||
 | 
			
		||||
  // ZBB Unit
 | 
			
		||||
 | 
			
		||||
@ -28,23 +28,31 @@
 | 
			
		||||
// and limitations under the License.
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
module zbc #(parameter WIDTH=32) (
 | 
			
		||||
  input  logic [WIDTH-1:0] A, RevA, B,       // Operands
 | 
			
		||||
  input  logic [2:0]       Funct3,           // Indicates operation to perform
 | 
			
		||||
  output logic [WIDTH-1:0] ZBCResult);       // ZBC result
 | 
			
		||||
module zbc import cvw::*; #(parameter cvw_t P) (
 | 
			
		||||
  input  logic [P.XLEN-1:0] A, RevA, B,       // Operands
 | 
			
		||||
  input  logic [2:0]        Funct3,           // Indicates operation to perform
 | 
			
		||||
  output logic [P.XLEN-1:0] ZBCResult);       // ZBC result
 | 
			
		||||
 | 
			
		||||
  logic [WIDTH-1:0] ClmulResult, RevClmulResult;
 | 
			
		||||
  logic [WIDTH-1:0] RevB;
 | 
			
		||||
  logic [WIDTH-1:0] X, Y;
 | 
			
		||||
  logic [P.XLEN-1:0] ClmulResult, RevClmulResult;
 | 
			
		||||
  logic [P.XLEN-1:0] RevB;
 | 
			
		||||
  logic [P.XLEN-1:0] X, Y;
 | 
			
		||||
 | 
			
		||||
  bitreverse #(WIDTH) brB(B, RevB);
 | 
			
		||||
  bitreverse #(P.XLEN) brB(B, RevB);
 | 
			
		||||
 | 
			
		||||
  mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X);
 | 
			
		||||
  mux2 #(WIDTH) ymux(RevB, B, ~Funct3[1], Y);
 | 
			
		||||
  // choose X = A for clmul, Rev(A) << 1 for clmulh, Rev(A) for clmulr
 | 
			
		||||
  // unshifted Rev(A) source is only needed for clmulr in ZBC, not in ZBKC
 | 
			
		||||
  if (P.ZBC_SUPPORTED)
 | 
			
		||||
    mux3 #(P.XLEN) xmux({RevA[P.XLEN-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X);
 | 
			
		||||
  else
 | 
			
		||||
    mux2 #(P.XLEN) xmux(A, {RevA[P.XLEN-2:0], {1'b0}}, Funct3[1], X);
 | 
			
		||||
 | 
			
		||||
  clmul #(WIDTH) clm(.X, .Y, .ClmulResult);
 | 
			
		||||
  
 | 
			
		||||
  bitreverse  #(WIDTH) brClmulResult(ClmulResult, RevClmulResult);
 | 
			
		||||
  // choose X = B for clmul, Rev(B) for clmulH
 | 
			
		||||
  mux2 #(P.XLEN) ymux(B, RevB, Funct3[1], Y);
 | 
			
		||||
 | 
			
		||||
  mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
 | 
			
		||||
  // carry free multiplier
 | 
			
		||||
  clmul #(P.XLEN) clm(.X, .Y, .ClmulResult);
 | 
			
		||||
 | 
			
		||||
  // choose result = rev(X @ Y) for clmulh/clmulr
 | 
			
		||||
  bitreverse #(P.XLEN) brClmulResult(ClmulResult, RevClmulResult);
 | 
			
		||||
  mux2 #(P.XLEN) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
 | 
			
		||||
endmodule
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user