Merge pull request #80 from ross144/main

Branch predictor acuracy fixes caused by last two weeks optimazations"
This commit is contained in:
David Harris 2023-02-15 09:39:26 -08:00 committed by GitHub
commit abf3fbbebf
11 changed files with 84 additions and 71 deletions

View File

@ -63,6 +63,8 @@ module controller(
output logic RegWriteM, // Instruction writes a register (needed for Hazard unit)
output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
output logic InstrValidD, InstrValidE, InstrValidM, // Instruction is valid
output logic BranchD, BranchE,
output logic JumpD,
output logic FWriteIntM, // FPU controller writes integer register file
// Writeback stage control signals
@ -85,8 +87,6 @@ module controller(
logic RegWriteD, RegWriteE; // RegWrite (register will be written)
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file
logic [1:0] MemRWD, MemRWE; // Store (write to memory)
logic JumpD; // Jump instruction
logic BranchD, BranchE; // Branch instruction
logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3)
logic [2:0] ALUControlD; // Determines ALU operation
logic ALUSrcAD, ALUSrcBD; // ALU inputs

View File

@ -55,6 +55,8 @@ module ieu (
input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp)
output logic InvalidateICacheM, FlushDCacheM, // Invalidate I$, flush D$
output logic InstrValidD, InstrValidE, InstrValidM,// Instruction is valid
output logic BranchD, BranchE,
output logic JumpD, JumpE,
// Writeback stage signals
input logic [`XLEN-1:0] FIntDivResultW, // Integer divide result from FPU fdivsqrt)
input logic [`XLEN-1:0] CSRReadValW, // CSR read value,
@ -87,7 +89,6 @@ module ieu (
logic [1:0] ForwardAE, ForwardBE; // Select signals for forwarding multiplexers
logic RegWriteM, RegWriteW; // Register will be written in Memory, Writeback stages
logic MemReadE, CSRReadE; // Load, CSRRead instruction
logic JumpE; // Jump instruction
logic BranchSignedE; // Branch does signed comparison on operands
logic MDUE; // Multiply/divide instruction
@ -95,7 +96,7 @@ module ieu (
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
.IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
.PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE,
.Funct3E, .IntDivE, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.Funct3E, .IntDivE, .MDUE, .W64E, .BranchD, .BranchE, .JumpD, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .InstrValidE, .InstrValidD, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .IntDivW, .ResultSrcW, .CSRWriteFenceM, .StoreStallD);

View File

@ -34,7 +34,9 @@ module RASPredictor #(parameter int StackSize = 16 )(
input logic reset,
input logic StallF, StallD, StallE, StallM, FlushD, FlushE, FlushM,
input logic [3:0] WrongPredInstrClassD, // Prediction class is wrong
input logic [3:0] InstrClassD, InstrClassE, PredInstrClassF, // Instr class
input logic [3:0] InstrClassD,
input logic [3:0] InstrClassE, // Instr class
input logic [3:0] PredInstrClassF,
input logic [`XLEN-1:0] PCLinkE, // PC of instruction after a jal
output logic [`XLEN-1:0] RASPCF // Top of the stack
);
@ -93,6 +95,3 @@ module RASPredictor #(parameter int StackSize = 16 )(
endmodule

View File

@ -52,6 +52,8 @@ module bpred (
// Branch and jump outcome
input logic InstrValidD, InstrValidE,
input logic BranchD, BranchE,
input logic JumpD, JumpE,
input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
@ -70,11 +72,12 @@ module bpred (
logic PredValidF;
logic [1:0] DirPredictionF;
logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD;
logic [3:0] BTBPredInstrClassF, PredInstrClassF, PredInstrClassD, PredInstrClassE;
logic [`XLEN-1:0] PredPCF, RASPCF;
logic PredictionPCWrongE;
logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
logic [3:0] InstrClassF, InstrClassD, InstrClassE, InstrClassW;
logic [3:0] InstrClassD;
logic [3:0] InstrClassE;
logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE;
logic SelBPPredF;
@ -83,9 +86,9 @@ module bpred (
logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD;
logic BTBTargetWrongE;
logic RASTargetWrongE;
logic JumpOrTakenBranchE;
logic BTBTargetWrongE;
logic RASTargetWrongE;
logic JumpOrTakenBranchE;
logic [`XLEN-1:0] PredPCD, PredPCE, RASPCD, RASPCE;
@ -104,17 +107,17 @@ module bpred (
end else if (`BPRED_TYPE == "BPSPECULATIVEGLOBAL") begin:Predictor
speculativeglobalhistory #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.DirPredictionF, .DirPredictionWrongE,
.PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE);
.PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE);
end else if (`BPRED_TYPE == "BPGSHARE") begin:Predictor
gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
.PCNextF, .PCE, .DirPredictionF, .DirPredictionWrongE,
gshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .PCSrcE);
end else if (`BPRED_TYPE == "BPSPECULATIVEGSHARE") begin:Predictor
speculativegshare #(`BPRED_SIZE) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .DirPredictionF, .DirPredictionWrongE,
.PredInstrClassF, .InstrClassD, .InstrClassE, .WrongPredInstrClassD, .PCSrcE);
.PredInstrClassF, .InstrClassD, .InstrClassE, .InstrClassM, .WrongPredInstrClassD, .PCSrcE);
end else if (`BPRED_TYPE == "BPLOCALPAg") begin:Predictor
// *** Fix me
@ -148,7 +151,8 @@ module bpred (
if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode
logic [4:0] CompressedOpcF;
logic [3:0] InstrClassF;
logic cjal, cj, cjr, cjalr;
logic cjal, cj, cjr, cjalr, CJumpF, CBranchF;
logic JumpF, BranchF;
assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
@ -156,30 +160,27 @@ module bpred (
assign cj = CompressedOpcF == 5'h0d;
assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign CJumpF = cjal | cj | cjr | cjalr;
assign CBranchF = CompressedOpcF[4:1] == 4'h7;
assign JumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
assign BranchF = PostSpillInstrRawF[6:0] == 7'h63;
assign InstrClassF[0] = PostSpillInstrRawF[6:0] == 7'h63 |
(`C_SUPPORTED & CompressedOpcF[4:1] == 4'h7);
assign InstrClassF[1] = (PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) != 5'h01 & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump register, but not return
(PostSpillInstrRawF[6:0] == 7'h6F & (PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01) | // jump, RD != x1 or x5
(`C_SUPPORTED & (cj | (cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) != 5'h01)) ));
assign InstrClassF[2] = PostSpillInstrRawF[6:0] == 7'h67 & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 | // return must return to ra or r5
assign InstrClassF[0] = BranchF | (`C_SUPPORTED & CBranchF);
assign InstrClassF[1] = JumpF | (`C_SUPPORTED & (cjal | cj | cj | cjalr));
assign InstrClassF[2] = (JumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // return must return to ra or r5
(`C_SUPPORTED & (cjalr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign InstrClassF[3] = ((PostSpillInstrRawF[6:0] & 7'h77) == 7'h67 & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5
assign InstrClassF[3] = (JumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // jal(r) must link to ra or x5
(`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
assign PredInstrClassF = InstrClassF;
assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1]) |
PredInstrClassF[2] |
PredInstrClassF[1] |
PredInstrClassF[3];
PredInstrClassF[1];
end else begin
assign PredInstrClassF = BTBPredInstrClassF;
assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & PredValidF) |
PredInstrClassF[2] |
(PredInstrClassF[1] & PredValidF) |
(PredInstrClassF[3] & PredValidF);
PredInstrClassF[1] & PredValidF;
end
// Part 3 RAS
@ -189,15 +190,13 @@ module bpred (
assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF;
assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return
(InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
assign InstrClassD[0] = BranchD;
assign InstrClassD[1] = JumpD ;
assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5
assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE);
flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM);
flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW);
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// branch predictor
@ -208,6 +207,7 @@ module bpred (
// pipeline the class
flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD);
flopenrc #(1) WrongInstrClassRegE(clk, reset, FlushE, ~StallE, AnyWrongPredInstrClassD, AnyWrongPredInstrClassE);
flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE);
// Check the prediction
// if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address.
@ -218,11 +218,12 @@ module bpred (
assign PredictionPCWrongE = PCCorrectE != PCD;
// branch class prediction wrong.
assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD;
assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD[3:0];
assign AnyWrongPredInstrClassD = |WrongPredInstrClassD;
// branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions.
assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD;
assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE));
//assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; // this does not work for cubic benchmark
// Output the predicted PC or corrected PC on miss-predict.
// Selects the BP or PC+2/4.
@ -247,10 +248,10 @@ module bpred (
// could be wrong or the fall through address selected for branch predict not taken.
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies.
assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] | InstrClassE[3]) & PCSrcE;
assign BTBPredPCWrongE = (PredPCE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & InstrClassE[2] & PCSrcE;
assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1] | InstrClassE[3];
assign JumpOrTakenBranchE = (InstrClassE[0] & PCSrcE) | InstrClassE[1];
flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM);

View File

@ -53,7 +53,6 @@ module btb #(parameter int Depth = 10 ) (
logic [`XLEN+4:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredictionF;
logic [`XLEN-1:0] PredPCD;
logic [3:0] PredInstrClassD; // *** copy of reg outside module
logic UpdateEn;
logic TablePredValidF, PredValidD;
@ -96,8 +95,6 @@ module btb #(parameter int Depth = 10 ) (
if(~StallF | reset) TablePredValidF = ValidBits[PCNextFIndex];
end
//assign PredValidF = MatchXF ? 1'b1 : TablePredValidF;
assign UpdateEn = |InstrClassE | AnyWrongPredInstrClassE;
// An optimization may be using a PC relative address.

View File

@ -31,12 +31,12 @@
module gshare #(parameter k = 10) (
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM,
input logic FlushD, FlushE, FlushM,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
output logic [1:0] DirPredictionF,
output logic DirPredictionWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCE,
input logic [`XLEN-1:0] PCNextF, PCM,
input logic BranchInstrE, BranchInstrM, PCSrcE
);
@ -44,20 +44,20 @@ module gshare #(parameter k = 10) (
logic [1:0] DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionE, NewDirPredictionM;
logic [k-1:0] GHRF, GHRD, GHRE, GHR;
logic [k-1:0] GHRF, GHRD, GHRE, GHRM, GHR;
logic [k-1:0] GHRNext;
logic PCSrcM;
assign IndexNextF = GHR & {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
assign IndexE = GHRE & {PCE[k+1] ^ PCE[1], PCE[k:2]};
assign IndexNextF = GHR ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
assign IndexE = GHRM ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF), .ce2(~StallM & ~FlushM),
.ra1(IndexNextF),
.rd1(DirPredictionF),
.wa2(IndexE),
.wd2(NewDirPredictionE),
.we2(BranchInstrE & ~StallM & ~FlushM),
.wd2(NewDirPredictionM),
.we2(BranchInstrM & ~StallW & ~FlushW),
.bwe2(1'b1));
flopenrc #(2) PredictionRegD(clk, reset, FlushD, ~StallD, DirPredictionF, DirPredictionD);
@ -75,6 +75,7 @@ module gshare #(parameter k = 10) (
flopenrc #(k) GHRFReg(clk, reset, FlushD, ~StallF, GHR, GHRF);
flopenrc #(k) GHRDReg(clk, reset, FlushD, ~StallD, GHRF, GHRD);
flopenrc #(k) GHREReg(clk, reset, FlushE, ~StallE, GHRD, GHRE);
flopenrc #(k) GHRMReg(clk, reset, FlushM, ~StallM, GHRE, GHRM);
endmodule

View File

@ -1,5 +1,5 @@
///////////////////////////////////////////
// speculativeglobalhistory.sv
// gsharePredictor.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
@ -36,7 +36,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
output logic [1:0] DirPredictionF,
output logic DirPredictionWrongE,
// update
input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE,
input logic [3:0] PredInstrClassF,
input logic [3:0] InstrClassD, InstrClassE, InstrClassM,
input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE
);
@ -47,9 +48,9 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionE;
logic [k-1:0] GHRF, GHRD, GHRE;
logic [k-1:0] GHRF, GHRD, GHRE, GHRM;
logic GHRLastF;
logic [k-1:0] GHRNextF, GHRNextD, GHRNextE;
logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM;
logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
@ -57,8 +58,8 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
assign IndexNextF = GHRNextF;
assign IndexF = GHRF;
assign IndexD = GHRD[k-1:0];
assign IndexE = GHRE[k-1:0];
assign IndexD = GHRD;
assign IndexE = GHRE;
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallM & ~FlushM),
@ -111,15 +112,18 @@ module speculativeglobalhistory #(parameter int k = 10 ) (
// If it is wrong and branch does exist then shift right and insert the prediction.
// If the branch does not exist then shift left and use GHRLastF to restore the LSB.
logic [k-1:0] GHRClassWrong;
mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong);
mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong);
// As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE.
mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD);
flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD);
mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE);
mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE);
flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE);
flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE);
assign GHRNextM = FlushM ? GHRM : GHRE;
flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM);
assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0];

View File

@ -37,7 +37,8 @@ module speculativegshare #(parameter int k = 10 ) (
output logic DirPredictionWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE,
input logic [3:0] PredInstrClassF, InstrClassD, InstrClassE,
input logic [3:0] PredInstrClassF,
input logic [3:0] InstrClassD, InstrClassE, InstrClassM,
input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE
);
@ -48,9 +49,9 @@ module speculativegshare #(parameter int k = 10 ) (
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionE;
logic [k-1:0] GHRF, GHRD, GHRE;
logic [k-1:0] GHRF, GHRD, GHRE, GHRM;
logic GHRLastF;
logic [k-1:0] GHRNextF, GHRNextD, GHRNextE;
logic [k-1:0] GHRNextF, GHRNextD, GHRNextE, GHRNextM;
logic [k-1:0] IndexNextF, IndexF, IndexD, IndexE;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
@ -112,15 +113,18 @@ module speculativegshare #(parameter int k = 10 ) (
// If it is wrong and branch does exist then shift right and insert the prediction.
// If the branch does not exist then shift left and use GHRLastF to restore the LSB.
logic [k-1:0] GHRClassWrong;
mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, InstrClassD[0], GHRClassWrong);
mux2 #(k) GHRClassWrongMux({DirPredictionD[1], GHRF[k-1:1]}, {GHRF[k-2:0], GHRLastF}, ~InstrClassD[0], GHRClassWrong);
// As with GHRF FlushD and wrong direction prediction flushes the pipeline and restores to GHRNextE.
mux3 #(k) GHRDMux(GHRF, GHRClassWrong, GHRNextE, {FlushDOrDirWrong, WrongPredInstrClassD[0]}, GHRNextD);
flopenr #(k) GHRDReg(clk, reset, ~StallD | FlushDOrDirWrong, GHRNextD, GHRD);
mux3 #(k) GHREMux(GHRD, GHRE, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE);
mux3 #(k) GHREMux(GHRD, GHRNextM, {PCSrcE, GHRD[k-2:0]}, {InstrClassE[0] & ~FlushM, FlushE}, GHRNextE);
flopenr #(k) GHREReg(clk, reset, ((InstrClassE[0] & ~FlushM) & ~StallE) | FlushE, GHRNextE, GHRE);
flopenr #(k) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, GHRE);
assign GHRNextM = FlushM ? GHRM : GHRE;
flopenr #(k) GHRMReg(clk, reset, (InstrClassM[0] & ~StallM) | FlushM, GHRNextM, GHRM);
assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & InstrClassE[0];

View File

@ -36,6 +36,8 @@ module ifu (
input logic InvalidateICacheM, // Clears all instruction cache valid bits
input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE)
input logic InstrValidD, InstrValidE, InstrValidM,
input logic BranchD, BranchE,
input logic JumpD, JumpE,
// Bus interface
output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU
input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU
@ -323,7 +325,8 @@ module ifu (
if (`BPRED_SUPPORTED) begin : bpred
bpred bpred(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW,
.FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE,
.FlushD, .FlushE, .FlushM, .FlushW, .InstrValidD, .InstrValidE,
.BranchD, .BranchE, .JumpD, .JumpE,
.InstrD, .PCNextF, .PCPlus2or4F, .PCNext1F, .PCE, .PCM, .PCSrcE, .IEUAdrE, .PCF, .NextValidPCE,
.PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .PostSpillInstrRawF, .JumpOrTakenBranchM, .BPPredWrongM,
.DirPredictionWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .PredictionInstrClassWrongM);

View File

@ -162,11 +162,13 @@ module wallypipelinedcore (
logic FCvtIntE;
logic CommittedF;
logic JumpOrTakenBranchM;
logic BranchD, BranchE, JumpD, JumpE;
// instruction fetch unit: PC, branch prediction, instruction cache
ifu ifu(.clk, .reset,
.StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.InstrValidM, .InstrValidE, .InstrValidD,
.BranchD, .BranchE, .JumpD, .JumpE,
// Fetch
.HRDATA, .PCFSpill, .IFUHADDR, .PCNext2F,
.IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE,
@ -200,6 +202,7 @@ module wallypipelinedcore (
.Funct3M, // size and signedness to LSU
.SrcAM, // to privilege and fpu
.RdE, .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
.BranchD, .BranchE, .JumpD, .JumpE,
// Writeback stage
.CSRReadValW, .MDUResultW, .FIntDivResultW, .RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM, .InstrValidE, .InstrValidD, .FCvtIntResW, .FCvtIntW,

View File

@ -50,9 +50,9 @@ string tvpaths[] = '{
string embench[] = '{
`EMBENCH,
"bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches
"bd_speedopt_speed/src/aha-mont64/aha-mont64",
"bd_speedopt_speed/src/crc32/crc32",
"bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches
"bd_speedopt_speed/src/edn/edn",
"bd_speedopt_speed/src/huffbench/huffbench",
"bd_speedopt_speed/src/matmult-int/matmult-int",