Modified the BTB to save power by not updating when the prediction is unchanged.

This commit is contained in:
Ross Thompson 2023-02-27 17:37:29 -06:00
parent 44361f0a34
commit 69e8358639
2 changed files with 21 additions and 10 deletions

View File

@ -96,7 +96,8 @@ module bpred (
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic WrongBPReturnD;
logic [`XLEN-1:0] BTAE;
// Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong.
if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor
@ -148,9 +149,9 @@ module bpred (
btb #(`BTB_SIZE)
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM,
.BTAF, .BTAD,
.BTAF, .BTAD, .BTAE,
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
.PredictionInstrClassWrongM,
.PredictionInstrClassWrongM, .AnyWrongPredInstrClassE,
.IEUAdrE, .IEUAdrM,
.InstrClassD({CallD, ReturnD, JumpD, BranchD}),
.InstrClassE({CallE, ReturnE, JumpE, BranchE}),
@ -195,7 +196,7 @@ module bpred (
if(`ZICOUNTERS_SUPPORTED) begin
logic JumpOrTakenBranchE;
logic [`XLEN-1:0] BTAE, RASPCD, RASPCE;
logic [`XLEN-1:0] RASPCD, RASPCE;
logic BTBPredPCWrongE, RASPredPCWrongE;
// performance counters
// 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now
@ -214,8 +215,6 @@ module bpred (
flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM);
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,

View File

@ -36,10 +36,12 @@ module btb #(parameter Depth = 10 ) (
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
output logic [`XLEN-1:0] BTAD,
output logic [`XLEN-1:0] BTAD,
output logic [`XLEN-1:0] BTAE,
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
// update
input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong
input logic AnyWrongPredInstrClassE,
input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb
input logic [3:0] InstrClassD, // Instruction class to insert into btb
@ -53,9 +55,11 @@ module btb #(parameter Depth = 10 ) (
logic MatchD, MatchE, MatchM, MatchW, MatchX;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredF;
logic UpdateEn;
logic [`XLEN-1:0] IEUAdrW;
logic [`XLEN-1:0] PCW;
logic BTAWrongE, BTBWrongE;
logic BTBWrongM;
// hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input.
@ -87,14 +91,22 @@ module btb #(parameter Depth = 10 ) (
assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF};
assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM;
// An optimization may be using a PC relative address.
ram2p1r1wbe #(2**Depth, `XLEN+4) memory(
.clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF),
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1));
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1));
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD);
// BTAE is not strickly necessary. However it is used by two parts of wally.
// 1. It gates updates to the BTB when the prediction does not change. This save power.
// 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong.
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
assign BTBWrongE = BTAWrongE | AnyWrongPredInstrClassE;
flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM);
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);