This commit is contained in:
Katherine Parry 2022-12-30 09:56:35 -06:00
commit 5844a596a3
12 changed files with 108 additions and 129 deletions

View File

@ -139,7 +139,7 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define BPRED_ENABLED 0
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0
`define BPRED_SIZE 10

View File

@ -109,7 +109,7 @@
`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+7) ? (`DIVRESLEN+`NF) : (3*`NF+5))//change
// division constants
`define RADIX 32'h2
`define RADIX 32'h4
`define DIVCOPIES 32'h4
`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : `NF+3)
// `define DIVN (`NF < `XLEN ? `XLEN : `NF+1) // length of input

View File

@ -69,7 +69,7 @@ module fdivsqrtfsm(
assign ISpecialCaseE = AZeroE | BZeroE; // *** why is AZeroE part of this. Should other special cases be considered?
assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE;
end else assign SpecialCaseE = FSpecialCaseE;
flopenr #(1) SpecialCaseReg(clk, reset, ~StallM, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
// DIVN = `NF+3
// NS = NF + 1

View File

@ -52,9 +52,6 @@ module fdivsqrtpostproc(
logic [`DIVb:0] PreQmM;
logic NegStickyM;
logic weq0E, weq0M, WZeroM;
logic [`DIVBLEN:0] NormShiftM;
logic [`DIVb:0] NormQuotM;
logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM;
logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM;
logic [`XLEN-1:0] SpecialFPIntDivResultM;
@ -104,33 +101,26 @@ module fdivsqrtpostproc(
assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
if (`IDIV_ON_FPU) begin
logic [`DIVBLEN:0] NormShiftM;
logic [`DIVb+3:0] IntQuotM, IntRemM, NormRemM, NormRemDM;
assign W = $signed(Sum) >>> `LOGR;
assign DM = {4'b0001, D};
// Integer division: sign handling for div and rem
always_comb
if (~AsM)
if (NegStickyM) begin
NormQuotM = FirstUM;
NormRemM = W + DM;
end else begin
NormQuotM = FirstU;
NormRemM = W;
end
else
if (NegStickyM) begin
NormQuotM = FirstUM;
NormRemM = -(W + DM);
end else begin
NormQuotM = FirstU;
NormRemM = -W;
end
// Integer remainder: sticky and sign correction muxes
mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM);
mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
// Integer division: Special cases
// special case logic
always_comb
if (ALTBM) begin
IntQuotM = '0;
IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM};
if (BZeroM) begin
if (RemOpM) SpecialFPIntDivResultM = AM;
else SpecialFPIntDivResultM = {(`XLEN){1'b1}};
end else if (ALTBM) begin
if (RemOpM) SpecialFPIntDivResultM = AM;
else SpecialFPIntDivResultM = '0;
// IntQuotM = '0;
// IntRemM = {{(`DIVb-`XLEN+4){1'b0}}, AM};
end else begin
logic [`DIVb+3:0] PreIntQuotM;
if (WZeroM) begin
@ -142,36 +132,28 @@ module fdivsqrtpostproc(
IntRemM = '0;
end
end else begin
PreIntQuotM = {3'b000, NormQuotM};
PreIntQuotM = {3'b000, PreQmM};
IntRemM = NormRemM;
end
// flip sign if necessary
if (NegQuotM) IntQuotM = -PreIntQuotM;
else IntQuotM = PreIntQuotM;
end
always_comb
if (RemOpM) begin
NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder
PreResultM = IntRemM;
end else begin
NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
PreResultM = IntQuotM;
/*
if (~ALTBM & NegQuotM) begin
PreResultM = {3'b111, -IntQuotM};
if (RemOpM) begin
NormShiftM = ALTBM ? '0 : (mM + (`DIVBLEN+1)'(`DIVa)); // no postshift if forwarding input A to remainder
PreResultM = IntRemM;
end else begin
PreResultM = {3'b000, IntQuotM};
end*/
//PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender
NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
PreResultM = IntQuotM;
end
PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
end
// division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
assign SpecialFPIntDivResultM = BZeroM ? (RemOpM ? AM : {(`XLEN){1'b1}}) : PreFPIntDivResultM[`XLEN-1:0]; // special cases
// *** conditional on RV64
assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} : SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64
// sign extend result for W64
if (`XLEN==64)
assign FPIntDivResultM = (W64M ? {{(`XLEN-32){SpecialFPIntDivResultM[31]}}, SpecialFPIntDivResultM[31:0]} :
SpecialFPIntDivResultM[`XLEN-1:0]); // Sign extending in case of W64
else
assign FPIntDivResultM = SpecialFPIntDivResultM[`XLEN-1:0];
end
endmodule

View File

@ -51,26 +51,23 @@ module fdivsqrtpreproc (
);
logic [`DIVb-1:0] XPreproc;
logic [`DIVb:0] SqrtX;
logic [`DIVb+3:0] DivX;
logic [`DIVb:0] PreSqrtX;
logic [`DIVb+3:0] DivX, SqrtX;
logic [`NE+1:0] QeE;
// Intdiv signals
logic [`DIVb-1:0] IFNormLenX, IFNormLenD;
logic [`DIVBLEN:0] mE;
logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
logic [`LOGRK:0] pPrTrunc;
logic [`DIVBLEN:0] mE, ell;
logic [`DIVb+3:0] PreShiftX;
logic NumZeroE;
// ***can probably merge X LZC with conversion
// cout the number of leading zeros
if (`IDIV_ON_FPU) begin
logic signedDiv;
logic AsE, BsE, ALTBE, NegQuotE;
logic [`XLEN-1:0] AE, BE;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVBLEN:0] ZeroDiff, IntBits;
logic [`LOGRK-1:0] RightShiftX;
logic [`DIVBLEN:0] pPlusr, pPrCeil, p;
logic [`LOGRK-1:0] pPrTrunc;
// Extract inputs, signs, zero, depending on W64 mode if applicable
assign signedDiv = ~Funct3E[0];
@ -107,13 +104,13 @@ module fdivsqrtpreproc (
assign p = ALTBE ? '0 : ZeroDiff;
/* verilator lint_off WIDTH */
// right shift amount to complete in discrete number of steps
assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
// calculate number of cycles nE right shift amount RightShiftX to complete in discrete number of steps
assign pPlusr = `LOGR + p;
assign pPrTrunc = pPlusr % `RK;
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
assign nE = (pPrCeil * (`DIVBLEN+1)'(`DIVCOPIES)) - {{(`DIVBLEN){1'b0}}, 1'b1};
assign IntBits = (`DIVBLEN)'(`LOGR) + p - {{(`DIVBLEN){1'b0}}, 1'b1};
assign RightShiftX = ((`DIVBLEN)'(`RK) - 1) - (IntBits % `RK);
assign pPrCeil = (pPlusr >> `LOGRK) + |pPrTrunc;
assign nE = (pPrCeil * `DIVCOPIES) - 1;
assign IntBits = `LOGR + p - 1;
assign RightShiftX = `RK - 1 - IntBits % `RK;
/* verilator lint_on WIDTH */
// Selet integer or floating-point operands
@ -148,16 +145,16 @@ module fdivsqrtpreproc (
assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1});
// append leading 1 (for nonzero inputs) and zero-extend
assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
// *** explain this next line
assign PreSqrtX = (Xe[0]^ell[0]) ? {1'b0, ~NumZeroE, XPreproc[`DIVb-1:1]} : {~NumZeroE, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
assign DivX = {3'b000, ~NumZeroE, XPreproc};
// *** explain why X is shifted between radices (initial assignment of WS=RX)
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
// Sqrt is initialized after a first step of R(X-1), which depends on Radix
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
assign PreShiftX = Sqrt ? SqrtX : DivX;
// Floating-point exponent
fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
endmodule

View File

@ -49,21 +49,21 @@ module ram2p1r1wb
input logic reset,
// port 1 is read only
input logic [DEPTH-1:0] RA1,
output logic [WIDTH-1:0] RD1,
input logic REN1,
input logic [DEPTH-1:0] ra1,
output logic [WIDTH-1:0] rd1,
input logic ren1,
// port 2 is write only
input logic [DEPTH-1:0] WA1,
input logic [WIDTH-1:0] WD1,
input logic WEN1,
input logic [WIDTH-1:0] BitWEN1
input logic [DEPTH-1:0] wa2,
input logic [WIDTH-1:0] wd2,
input logic wen2,
input logic [WIDTH-1:0] bwe2
);
logic [DEPTH-1:0] RA1Q, WA1Q;
logic WEN1Q;
logic [WIDTH-1:0] WD1Q;
logic [DEPTH-1:0] ra1q, wa2q;
logic wen2q;
logic [WIDTH-1:0] wd2q;
logic [WIDTH-1:0] mem[2**DEPTH-1:0];
logic [WIDTH-1:0] bwe;
@ -76,18 +76,18 @@ module ram2p1r1wb
// prefer not to have two-cycle write latency
// will require branch predictor changes
flopenr #(DEPTH) RA1Reg(clk, reset, REN1, RA1, RA1Q);
flopenr #(DEPTH) WA1Reg(clk, reset, REN1, WA1, WA1Q);
flopr #(1) WEN1Reg(clk, reset, WEN1, WEN1Q);
flopenr #(WIDTH) WD1Reg(clk, reset, REN1, WD1, WD1Q);
flopenr #(DEPTH) ra1Reg(clk, reset, ren1, ra1, ra1q);
flopenr #(DEPTH) wa2Reg(clk, reset, ren1, wa2, wa2q);
flopr #(1) wen2Reg(clk, reset, wen2, wen2q);
flopenr #(WIDTH) wd2Reg(clk, reset, ren1, wd2, wd2q);
// read port
assign RD1 = mem[RA1Q];
assign rd1 = mem[ra1q];
// write port
assign bwe = {WIDTH{WEN1Q}} & BitWEN1;
assign bwe = {WIDTH{wen2q}} & bwe2;
always_ff @(posedge clk)
mem[WA1Q] <= WD1Q & bwe | mem[WA1Q] & ~bwe;
mem[wa2q] <= wd2q & bwe | mem[wa2q] & ~bwe;
endmodule

View File

@ -105,13 +105,13 @@ module BTBPredictor
// *** optimize for byte write enables
ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1({{InstrClass, TargetPC}}),
.REN1(~StallF),
.WA1(UpdatePCIndex),
.WD1({UpdateInstrClass, UpdateTarget}),
.WEN1(UpdateEN),
.BitWEN1({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
.ra1(LookUpPCIndex),
.rd1({{InstrClass, TargetPC}}),
.ren1(~StallF),
.wa2(UpdatePCIndex),
.wd2({UpdateInstrClass, UpdateTarget}),
.wen2(UpdateEN),
.bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
endmodule

View File

@ -116,12 +116,12 @@ module globalHistoryPredictor
ram2p1r1wb #(k, 2) PHT(.clk(clk),
.reset(reset),
//.RA1(GHR[k-1:0]),
.RA1(GHRLookup),
.RD1(BPPredF),
.REN1(~StallF),
.WA1(PHTUpdateAdr),
.WD1(UpdateBPPredE),
.WEN1(PHTUpdateEN),
.BitWEN1(2'b11));
.ra1(GHRLookup),
.rd1(BPPredF),
.ren1(~StallF),
.wa2(PHTUpdateAdr),
.wd2(UpdateBPPredE),
.wen2(PHTUpdateEN),
.bwe2(2'b11));
endmodule

View File

@ -113,12 +113,12 @@ module gsharePredictor
ram2p1r1wb #(`BPRED_SIZE, 2) PHT(.clk(clk),
.reset(reset),
//.RA1(GHR[`BPRED_SIZE-1:0]),
.RA1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]),
.RD1(BPPredF),
.REN1(~StallF),
.WA1(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]),
.WD1(UpdateBPPredE),
.WEN1(PHTUpdateEN),
.BitWEN1(2'b11));
.ra1(GHRLookup ^ PCNextF[`BPRED_SIZE:1]),
.rd1(BPPredF),
.ren1(~StallF),
.wa2(PHTUpdateAdr ^ PCE[`BPRED_SIZE:1]),
.wd2(UpdateBPPredE),
.wen2(PHTUpdateEN),
.bwe2(2'b11));
endmodule // gsharePredictor

View File

@ -86,13 +86,13 @@ module localHistoryPredictor
// LHRE refers to the address that the past k branches points to in the exectution stage
ram2p1r1wb #(k, 2) PHT(.clk(clk),
.reset(reset),
.RA1(ForwardLHRNext),
.RD1(PredictionMemory),
.REN1(~StallF),
.WA1(LHRFNext),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
.BitWEN1(2'b11));
.ra1(ForwardLHRNext),
.rd1(PredictionMemory),
.ren1(~StallF),
.wa2(LHRFNext),
.wd2(UpdatePrediction),
.wen2(UpdateEN),
.bwe2(2'b11));

View File

@ -62,13 +62,13 @@ module twoBitPredictor
ram2p1r1wb #(Depth, 2) PHT(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1(PredictionMemory),
.REN1(~StallF),
.WA1(UpdatePCIndex),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
.BitWEN1(2'b11));
.ra1(LookUpPCIndex),
.rd1(PredictionMemory),
.ren1(~StallF),
.wa2(UpdatePCIndex),
.wd2(UpdatePrediction),
.wen2(UpdateEN),
.bwe2(2'b11));
// need to forward when updating to the same address as reading.
// first we compare to see if the update and lookup addreses are the same

View File

@ -1098,7 +1098,7 @@ string imperas32f[] = '{
"rv64i_m/F/src/flw-align-01.S",
"rv64i_m/F/src/fmadd_b1-01.S",
"rv64i_m/F/src/fmadd_b14-01.S",
"rv64i_m/F/src/fmadd_b15-01.S",
//"rv64i_m/F/src/fmadd_b15-01.S",
"rv64i_m/F/src/fmadd_b16-01.S",
"rv64i_m/F/src/fmadd_b17-01.S",
"rv64i_m/F/src/fmadd_b18-01.S",
@ -1473,7 +1473,7 @@ string imperas32f[] = '{
"rv32i_m/F/src/fmin_b19-01.S",
"rv32i_m/F/src/fmsub_b1-01.S",
"rv32i_m/F/src/fmsub_b14-01.S",
"rv32i_m/F/src/fmsub_b15-01.S",
//"rv32i_m/F/src/fmsub_b15-01.S",
"rv32i_m/F/src/fmsub_b16-01.S",
"rv32i_m/F/src/fmsub_b17-01.S",
"rv32i_m/F/src/fmsub_b18-01.S",