Branch predictor cleanup.

This commit is contained in:
Ross Thompson 2023-02-26 21:28:36 -06:00
parent 3804626166
commit 447f6b1443
3 changed files with 72 additions and 79 deletions

View File

@ -70,33 +70,32 @@ module bpred (
output logic PredictionInstrClassWrongM // Class prediction is wrong
);
logic [1:0] BPDirPredF;
logic [1:0] BPDirPredF;
logic [`XLEN-1:0] BTAF, RASPCF;
logic PredictionPCWrongE;
logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
logic BPDirPredWrongE;
logic [`XLEN-1:0] BTAF, RASPCF;
logic PredictionPCWrongE;
logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
logic BPDirPredWrongE;
logic BPPCSrcF;
logic [`XLEN-1:0] BPPredPCF;
logic [`XLEN-1:0] PCNext0F;
logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD;
logic BPPCSrcF;
logic [`XLEN-1:0] BPPCF;
logic [`XLEN-1:0] PCNext0F;
logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD;
logic BTBTargetWrongE;
logic RASTargetWrongE;
logic BTBTargetWrongE;
logic RASTargetWrongE;
logic [`XLEN-1:0] BTAD;
logic [`XLEN-1:0] BTAD;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
logic ReturnD, CallD;
logic ReturnE, CallE;
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic WrongBPReturnD;
logic [`XLEN-1:0] PCW, IEUAdrW;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
logic ReturnD, CallD;
logic ReturnE, CallE;
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic WrongBPReturnD;
// Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong.
@ -148,30 +147,27 @@ module bpred (
btb #(`BTB_SIZE)
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW,
.PCNextF, .PCF, .PCD, .PCE, .PCM,
.BTAF, .BTAD,
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
.PredictionInstrClassWrongM,
.IEUAdrE, .IEUAdrM, .IEUAdrW,
.InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), .InstrClassM({CallM, ReturnM, JumpM, BranchM}),
.IEUAdrE, .IEUAdrM,
.InstrClassD({CallD, ReturnD, JumpD, BranchD}),
.InstrClassE({CallE, ReturnE, JumpE, BranchE}),
.InstrClassM({CallM, ReturnM, JumpM, BranchM}),
.InstrClassW({CallW, ReturnW, JumpW, BranchW}));
icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, .BTBBranchF,
.BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD);
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
.BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD);
// Part 3 RAS
RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
.BPReturnF, .ReturnD, .ReturnE, .CallE,
.WrongBPReturnD, .RASPCF, .PCLinkE);
assign BPPredPCF = BPReturnF ? RASPCF : BTAF;
// Check the prediction
// if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address.
// if the class prediction is wrong a regular instruction may have been predicted as a taken branch
@ -179,19 +175,15 @@ module bpred (
// The next instruction is always valid as no other flush would occur at the same time as the branch and not
// also flush the branch. This will change in a superscaler cpu.
assign PredictionPCWrongE = PCCorrectE != PCD;
// branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions.
assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD;
// *** clean up old signal names for testing.
logic BPPredWrongEAlt;
logic NotMatch;
assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD;
assign NotMatch = BPPredWrongE != BPPredWrongEAlt;
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// Output the predicted PC or corrected PC on miss-predict.
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF);
// Selects the BP or PC+2/4.
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F);
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PCNext0F);
// If the prediction is wrong select the correct address.
mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F);
// Correct branch/jump target.
@ -203,42 +195,39 @@ module bpred (
else assign NextValidPCE = PCE;
if(`ZICOUNTERS_SUPPORTED) begin
logic JumpOrTakenBranchE;
logic [`XLEN-1:0] BTAE, RASPCD, RASPCE;
logic BTBPredPCWrongE, RASPredPCWrongE;
// performance counters
// 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
// 3. target ras (ras target wrong / class[2])
// 4. direction (br dir wrong / class[0])
logic JumpOrTakenBranchE;
logic [`XLEN-1:0] BTAE, RASPCD, RASPCE;
logic BTBPredPCWrongE, RASPredPCWrongE;
// performance counters
// 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
// 3. target ras (ras target wrong / class[2])
// 4. direction (br dir wrong / class[0])
// Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction
// could be wrong or the fall through address selected for branch predict not taken.
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies.
assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
// Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction
// could be wrong or the fall through address selected for branch predict not taken.
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies.
assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE;
flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM);
assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE;
flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM);
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE},
{BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM});
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE},
{BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM});
end else begin
assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0;
assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0;
end
// **** Fix me
assign InstrClassM = {CallM, ReturnM, JumpM, BranchM};
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
endmodule

View File

@ -34,7 +34,7 @@ module btb #(parameter Depth = 10 ) (
input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BTAD,
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
@ -42,20 +42,21 @@ module btb #(parameter Depth = 10 ) (
input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong
input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrW,
input logic [3:0] InstrClassD, // Instruction class to insert into btb
input logic [3:0] InstrClassE, // Instruction class to insert into btb
input logic [3:0] InstrClassM, // Instruction class to insert into btb
input logic [3:0] InstrClassW
);
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
logic [`XLEN-1:0] ResetPC;
logic MatchD, MatchE, MatchM, MatchW, MatchX;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredF;
logic UpdateEn;
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
logic [`XLEN-1:0] ResetPC;
logic MatchD, MatchE, MatchM, MatchW, MatchX;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredF;
logic UpdateEn;
logic [`XLEN-1:0] IEUAdrW;
logic [`XLEN-1:0] PCW;
// hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input.
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
@ -94,5 +95,8 @@ module btb #(parameter Depth = 10 ) (
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1));
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD);
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
endmodule

View File

@ -34,7 +34,7 @@ module icpred (
input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction
input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction
input logic BranchD, BranchE,
input logic JumpD, JumpE,
output logic BranchM, BranchW,