Branch predictor cleanup.

This commit is contained in:
Ross Thompson 2023-02-26 21:28:36 -06:00
parent 3804626166
commit 447f6b1443
3 changed files with 72 additions and 79 deletions

View File

@ -70,33 +70,32 @@ module bpred (
output logic PredictionInstrClassWrongM // Class prediction is wrong output logic PredictionInstrClassWrongM // Class prediction is wrong
); );
logic [1:0] BPDirPredF; logic [1:0] BPDirPredF;
logic [`XLEN-1:0] BTAF, RASPCF; logic [`XLEN-1:0] BTAF, RASPCF;
logic PredictionPCWrongE; logic PredictionPCWrongE;
logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
logic BPDirPredWrongE; logic BPDirPredWrongE;
logic BPPCSrcF; logic BPPCSrcF;
logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] BPPCF;
logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCNext0F;
logic [`XLEN-1:0] PCCorrectE; logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD; logic [3:0] WrongPredInstrClassD;
logic BTBTargetWrongE; logic BTBTargetWrongE;
logic RASTargetWrongE; logic RASTargetWrongE;
logic [`XLEN-1:0] BTAD; logic [`XLEN-1:0] BTAD;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF; logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF; logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
logic BPBranchD, BPJumpD, BPReturnD, BPCallD; logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
logic ReturnD, CallD; logic ReturnD, CallD;
logic ReturnE, CallE; logic ReturnE, CallE;
logic BranchM, JumpM, ReturnM, CallM; logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW; logic BranchW, JumpW, ReturnW, CallW;
logic WrongBPReturnD; logic WrongBPReturnD;
logic [`XLEN-1:0] PCW, IEUAdrW;
// Part 1 branch direction prediction // Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong. // look into the 2 port Sram model. something is wrong.
@ -148,30 +147,27 @@ module bpred (
btb #(`BTB_SIZE) btb #(`BTB_SIZE)
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW, .PCNextF, .PCF, .PCD, .PCE, .PCM,
.BTAF, .BTAD, .BTAF, .BTAD,
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
.PredictionInstrClassWrongM, .PredictionInstrClassWrongM,
.IEUAdrE, .IEUAdrM, .IEUAdrW, .IEUAdrE, .IEUAdrM,
.InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), .InstrClassM({CallM, ReturnM, JumpM, BranchM}), .InstrClassD({CallD, ReturnD, JumpD, BranchD}),
.InstrClassE({CallE, ReturnE, JumpE, BranchE}),
.InstrClassM({CallM, ReturnM, JumpM, BranchM}),
.InstrClassW({CallW, ReturnW, JumpW, BranchW})); .InstrClassW({CallW, ReturnW, JumpW, BranchW}));
icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, icpred icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW, .PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF, .BTBBranchF, .CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW, .BTBCallF, .BTBReturnF, .BTBJumpF,
.BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD); .BTBBranchF, .BPCallF, .BPReturnF, .BPJumpF, .BPBranchF, .PredictionInstrClassWrongM, .WrongBPReturnD);
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// Part 3 RAS // Part 3 RAS
RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
.BPReturnF, .ReturnD, .ReturnE, .CallE, .BPReturnF, .ReturnD, .ReturnE, .CallE,
.WrongBPReturnD, .RASPCF, .PCLinkE); .WrongBPReturnD, .RASPCF, .PCLinkE);
assign BPPredPCF = BPReturnF ? RASPCF : BTAF;
// Check the prediction // Check the prediction
// if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address. // if it is a CFI then check if the next instruction address (PCD) matches the branch's target or fallthrough address.
// if the class prediction is wrong a regular instruction may have been predicted as a taken branch // if the class prediction is wrong a regular instruction may have been predicted as a taken branch
@ -179,19 +175,15 @@ module bpred (
// The next instruction is always valid as no other flush would occur at the same time as the branch and not // The next instruction is always valid as no other flush would occur at the same time as the branch and not
// also flush the branch. This will change in a superscaler cpu. // also flush the branch. This will change in a superscaler cpu.
assign PredictionPCWrongE = PCCorrectE != PCD; assign PredictionPCWrongE = PCCorrectE != PCD;
// branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions.
assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD;
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// *** clean up old signal names for testing.
logic BPPredWrongEAlt;
logic NotMatch;
assign BPPredWrongEAlt = PredictionPCWrongE & InstrValidE & InstrValidD;
assign NotMatch = BPPredWrongE != BPPredWrongEAlt;
// Output the predicted PC or corrected PC on miss-predict. // Output the predicted PC or corrected PC on miss-predict.
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF);
// Selects the BP or PC+2/4. // Selects the BP or PC+2/4.
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F); mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PCNext0F);
// If the prediction is wrong select the correct address. // If the prediction is wrong select the correct address.
mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F);
// Correct branch/jump target. // Correct branch/jump target.
@ -203,42 +195,39 @@ module bpred (
else assign NextValidPCE = PCE; else assign NextValidPCE = PCE;
if(`ZICOUNTERS_SUPPORTED) begin if(`ZICOUNTERS_SUPPORTED) begin
logic JumpOrTakenBranchE; logic JumpOrTakenBranchE;
logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; logic [`XLEN-1:0] BTAE, RASPCD, RASPCE;
logic BTBPredPCWrongE, RASPredPCWrongE; logic BTBPredPCWrongE, RASPredPCWrongE;
// performance counters // performance counters
// 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal) // 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
// 3. target ras (ras target wrong / class[2]) // 3. target ras (ras target wrong / class[2])
// 4. direction (br dir wrong / class[0]) // 4. direction (br dir wrong / class[0])
// Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction // Unforuantely we can't use PCD to infer the correctness of the BTB or RAS because the class prediction
// could be wrong or the fall through address selected for branch predict not taken. // could be wrong or the fall through address selected for branch predict not taken.
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of // By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies. // both without the above inaccuracies.
assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE; assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE; assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE; assign JumpOrTakenBranchE = (BranchE & PCSrcE) | JumpE;
flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM);
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE}, {BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE},
{BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM}); {BPDirPredWrongM, BTBPredPCWrongM, RASPredPCWrongM});
end else begin end else begin
assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0; assign {BTBPredPCWrongM, RASPredPCWrongM, JumpOrTakenBranchM} = '0;
end end
// **** Fix me // **** Fix me
assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; assign InstrClassM = {CallM, ReturnM, JumpM, BranchM};
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
endmodule endmodule

View File

@ -34,7 +34,7 @@ module btb #(parameter Depth = 10 ) (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
output logic [`XLEN-1:0] BTAF, // BTB's guess at PC output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BTAD, output logic [`XLEN-1:0] BTAD,
output logic [3:0] BTBIClassF, // BTB's guess at instruction class output logic [3:0] BTBIClassF, // BTB's guess at instruction class
@ -42,20 +42,21 @@ module btb #(parameter Depth = 10 ) (
input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong
input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrW,
input logic [3:0] InstrClassD, // Instruction class to insert into btb input logic [3:0] InstrClassD, // Instruction class to insert into btb
input logic [3:0] InstrClassE, // Instruction class to insert into btb input logic [3:0] InstrClassE, // Instruction class to insert into btb
input logic [3:0] InstrClassM, // Instruction class to insert into btb input logic [3:0] InstrClassM, // Instruction class to insert into btb
input logic [3:0] InstrClassW input logic [3:0] InstrClassW
); );
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex; logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
logic [`XLEN-1:0] ResetPC; logic [`XLEN-1:0] ResetPC;
logic MatchD, MatchE, MatchM, MatchW, MatchX; logic MatchD, MatchE, MatchM, MatchW, MatchX;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredF; logic [`XLEN+3:0] TableBTBPredF;
logic UpdateEn; logic UpdateEn;
logic [`XLEN-1:0] IEUAdrW;
logic [`XLEN-1:0] PCW;
// hashing function for indexing the PC // hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input. // We have Depth bits to index, but XLEN bits as the input.
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
@ -94,5 +95,8 @@ module btb #(parameter Depth = 10 ) (
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1));
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD);
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
endmodule endmodule

View File

@ -34,7 +34,7 @@ module icpred (
input logic clk, reset, input logic clk, reset,
input logic StallF, StallD, StallE, StallM, StallW, input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW, input logic FlushD, FlushE, FlushM, FlushW,
input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction input logic [31:0] PostSpillInstrRawF, InstrD, // Instruction
input logic BranchD, BranchE, input logic BranchD, BranchE,
input logic JumpD, JumpE, input logic JumpD, JumpE,
output logic BranchM, BranchW, output logic BranchM, BranchW,