Modified btb forwarding logic to reduce critical path.

This commit is contained in:
Ross Thompson 2023-02-24 17:47:43 -06:00
parent 5243d1e1d4
commit 4ffaa75c2a
2 changed files with 32 additions and 24 deletions

View File

@ -77,7 +77,7 @@ module bpred (
logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE; logic AnyWrongPredInstrClassD, AnyWrongPredInstrClassE;
logic DirPredictionWrongE; logic DirPredictionWrongE;
logic SelBPPredF; logic BPPCSrcF;
logic [`XLEN-1:0] BPPredPCF; logic [`XLEN-1:0] BPPredPCF;
logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCNext0F;
logic [`XLEN-1:0] PCCorrectE; logic [`XLEN-1:0] PCCorrectE;
@ -96,6 +96,7 @@ module bpred (
logic BranchM, JumpM, RetM, JalM; logic BranchM, JumpM, RetM, JalM;
logic WrongBPRetD; logic WrongBPRetD;
logic [`XLEN-1:0] PCW;
// Part 1 branch direction prediction // Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong. // look into the 2 port Sram model. something is wrong.
@ -147,16 +148,18 @@ module bpred (
btb #(`BTB_SIZE) btb #(`BTB_SIZE)
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .PCNextF, .PCF, .PCD, .PCE, .PCM, .PCW,
.BTAF, .BTAD, .BTAF, .BTAD,
.BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}), .BTBPredInstrClassF({BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}),
.PredictionInstrClassWrongM, .PredictionInstrClassWrongM,
.IEUAdrE, .IEUAdrM, .IEUAdrE, .IEUAdrM,
.InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM})); .InstrClassD({JalD, RetD, JumpD, BranchD}), .InstrClassE({JalE, RetE, JumpE, BranchE}), .InstrClassM({JalM, RetM, JumpM, BranchM}));
// the branch predictor needs a compact decoding of the instruction class. if (!`INSTR_CLASS_PRED) begin : DirectClassDecode
if (`INSTR_CLASS_PRED == 0) begin : DirectClassDecode // This section is mainly for testing, verification, and PPA comparison.
logic [3:0] InstrClassF; // An alternative to using the BTB to store the instruction class is to partially decode
// the instructions in the Fetch stage into, Jal, Ret, Jump, and Branch instructions.
// This logic is not described in the text book as of 23 February 2023.
logic cjal, cj, cjr, cjalr, CJumpF, CBranchF; logic cjal, cj, cjr, cjalr, CJumpF, CBranchF;
logic NCJumpF, NCBranchF; logic NCJumpF, NCBranchF;
@ -185,9 +188,10 @@ module bpred (
(`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01))); (`C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
end else begin end else begin
// This section connects the BTB's instruction class prediction.
assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF}; assign {BPJalF, BPRetF, BPJumpF, BPBranchF} = {BTBJalF, BTBRetF, BTBJumpF, BTBBranchF};
end end
assign SelBPPredF = (BPBranchF & DirPredictionF[1]) | BPJumpF; assign BPPCSrcF = (BPBranchF & DirPredictionF[1]) | BPJumpF;
// Part 3 RAS // Part 3 RAS
RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, RASPredictor RASPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
@ -196,11 +200,7 @@ module bpred (
assign BPPredPCF = BPRetF ? RASPCF : BTAF; assign BPPredPCF = BPRetF ? RASPCF : BTAF;
//assign InstrClassD[0] = BranchD;
//assign InstrClassD[1] = JumpD ;
//assign InstrClassD[2] = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5
assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5 assign RetD = JumpD & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or x5
//assign InstrClassD[3] = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 assign JalD = JumpD & (InstrD[11:7] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE}); flopenrc #(2) InstrClassRegE(clk, reset, FlushE, ~StallE, {JalD, RetD}, {JalE, RetE});
@ -227,7 +227,6 @@ module bpred (
assign WrongBPRetD = BPRetD ^ RetD; assign WrongBPRetD = BPRetD ^ RetD;
// branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions. // branch is wrong only if the PC does not match and both the Decode and Fetch stages have valid instructions.
//assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE | (AnyWrongPredInstrClassE & ~|InstrClassE));
assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD; assign BPPredWrongE = PredictionPCWrongE & InstrValidE & InstrValidD;
logic BPPredWrongEAlt; logic BPPredWrongEAlt;
@ -237,7 +236,7 @@ module bpred (
// Output the predicted PC or corrected PC on miss-predict. // Output the predicted PC or corrected PC on miss-predict.
// Selects the BP or PC+2/4. // Selects the BP or PC+2/4.
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, SelBPPredF, PCNext0F); mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPredPCF, BPPCSrcF, PCNext0F);
// If the prediction is wrong select the correct address. // If the prediction is wrong select the correct address.
mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F); mux2 #(`XLEN) pcmux1(PCNext0F, PCCorrectE, BPPredWrongE, PCNext1F);
// Correct branch/jump target. // Correct branch/jump target.
@ -283,5 +282,6 @@ module bpred (
// **** Fix me // **** Fix me
assign InstrClassM = {JalM, RetM, JumpM, BranchM}; assign InstrClassM = {JalM, RetM, JumpM, BranchM};
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
endmodule endmodule

View File

@ -34,7 +34,7 @@ module btb #(parameter Depth = 10 ) (
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, // PC at various stages input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW,// PC at various stages
output logic [`XLEN-1:0] BTAF, // BTB's guess at PC output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BTAD, output logic [`XLEN-1:0] BTAD,
output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class output logic [3:0] BTBPredInstrClassF, // BTB's guess at instruction class
@ -47,12 +47,14 @@ module btb #(parameter Depth = 10 ) (
input logic [3:0] InstrClassM // Instruction class to insert into btb input logic [3:0] InstrClassM // Instruction class to insert into btb
); );
logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex; logic [Depth-1:0] PCNextFIndex, PCFIndex, PCDIndex, PCEIndex, PCMIndex, PCWIndex;
logic [`XLEN-1:0] ResetPC; logic [`XLEN-1:0] ResetPC;
logic MatchF, MatchD, MatchE, MatchM, MatchNextX, MatchXF; logic MatchF, MatchD, MatchE, MatchM, MatchW, MatchNextX, MatchX;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF;
logic UpdateEn; logic UpdateEn;
logic [3:0] InstrClassW;
logic [`XLEN-1:0] IEUAdrW;
// hashing function for indexing the PC // hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input. // We have Depth bits to index, but XLEN bits as the input.
@ -62,6 +64,7 @@ module btb #(parameter Depth = 10 ) (
assign PCDIndex = {PCD[Depth+1] ^ PCD[1], PCD[Depth:2]}; assign PCDIndex = {PCD[Depth+1] ^ PCD[1], PCD[Depth:2]};
assign PCEIndex = {PCE[Depth+1] ^ PCE[1], PCE[Depth:2]}; assign PCEIndex = {PCE[Depth+1] ^ PCE[1], PCE[Depth:2]};
assign PCMIndex = {PCM[Depth+1] ^ PCM[1], PCM[Depth:2]}; assign PCMIndex = {PCM[Depth+1] ^ PCM[1], PCM[Depth:2]};
assign PCWIndex = {PCW[Depth+1] ^ PCW[1], PCW[Depth:2]};
// must output a valid PC and valid bit during reset. Because only PCF, not PCNextF is reset, PCNextF is invalid // must output a valid PC and valid bit during reset. Because only PCF, not PCNextF is reset, PCNextF is invalid
// during reset. The BTB must produce a non X PC1NextF to allow the simulation to run. // during reset. The BTB must produce a non X PC1NextF to allow the simulation to run.
@ -71,21 +74,24 @@ module btb #(parameter Depth = 10 ) (
assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]}; assign PCNextFIndex = reset ? ResetPC[Depth+1:2] : {PCNextF[Depth+1] ^ PCNextF[1], PCNextF[Depth:2]};
assign MatchF = PCNextFIndex == PCFIndex; assign MatchF = PCNextFIndex == PCFIndex;
assign MatchD = PCNextFIndex == PCDIndex; assign MatchD = PCFIndex == PCDIndex;
assign MatchE = PCNextFIndex == PCEIndex; assign MatchE = PCFIndex == PCEIndex;
assign MatchM = PCNextFIndex == PCMIndex; assign MatchM = PCFIndex == PCMIndex;
assign MatchNextX = MatchF | MatchD | MatchE | MatchM; assign MatchW = PCFIndex == PCWIndex;
assign MatchX = MatchD | MatchE | MatchM | MatchW;
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); // flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardBTBPrediction = MatchF ? {BTBPredInstrClassF, BTAF} : assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} :
MatchD ? {InstrClassD, BTAD} :
MatchE ? {InstrClassE, IEUAdrE} : MatchE ? {InstrClassE, IEUAdrE} :
{InstrClassM, IEUAdrM} ; MatchM ? {InstrClassM, IEUAdrM} :
{InstrClassW, IEUAdrW} ;
/* -----\/----- EXCLUDED -----\/-----
flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF);
-----/\----- EXCLUDED -----/\----- */
assign {BTBPredInstrClassF, BTAF} = MatchXF ? ForwardBTBPredictionF : {TableBTBPredictionF}; assign {BTBPredInstrClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredictionF};
assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM;
@ -97,4 +103,6 @@ module btb #(parameter Depth = 10 ) (
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD);
flopenrc #(`XLEN+4) IEUAdrWReg(clk, reset, FlushW, ~StallW, {InstrClassM, IEUAdrM}, {InstrClassW, IEUAdrW});
endmodule endmodule