diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 7512e2d3b..3ad0506c4 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -96,7 +96,8 @@ module bpred ( logic BranchM, JumpM, ReturnM, CallM; logic BranchW, JumpW, ReturnW, CallW; logic WrongBPReturnD; - + logic [`XLEN-1:0] BTAE; + // Part 1 branch direction prediction // look into the 2 port Sram model. something is wrong. if (`BPRED_TYPE == "BP_TWOBIT") begin:Predictor @@ -148,9 +149,9 @@ module bpred ( btb #(`BTB_SIZE) TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, .PCNextF, .PCF, .PCD, .PCE, .PCM, - .BTAF, .BTAD, + .BTAF, .BTAD, .BTAE, .BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}), - .PredictionInstrClassWrongM, + .PredictionInstrClassWrongM, .AnyWrongPredInstrClassE, .IEUAdrE, .IEUAdrM, .InstrClassD({CallD, ReturnD, JumpD, BranchD}), .InstrClassE({CallE, ReturnE, JumpE, BranchE}), @@ -195,7 +196,7 @@ module bpred ( if(`ZICOUNTERS_SUPPORTED) begin logic JumpOrTakenBranchE; - logic [`XLEN-1:0] BTAE, RASPCD, RASPCE; + logic [`XLEN-1:0] RASPCD, RASPCE; logic BTBPredPCWrongE, RASPredPCWrongE; // performance counters // 1. class (class wrong / minstret) (PredictionInstrClassWrongM / csr) // Correct now @@ -214,8 +215,6 @@ module bpred ( flopenrc #(1) JumpOrTakenBranchMReg(clk, reset, FlushM, ~StallM, JumpOrTakenBranchE, JumpOrTakenBranchM); - flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); - flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD); flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE); flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM, diff --git a/src/ifu/bpred/btb.sv b/src/ifu/bpred/btb.sv index b0d988943..1d6c0ff8d 100644 --- a/src/ifu/bpred/btb.sv +++ b/src/ifu/bpred/btb.sv @@ -36,10 +36,12 @@ module btb #(parameter Depth = 10 ) ( input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages output logic [`XLEN-1:0] BTAF, // BTB's guess at PC - output logic [`XLEN-1:0] BTAD, + output logic [`XLEN-1:0] BTAD, + output logic [`XLEN-1:0] BTAE, output logic [3:0] BTBIClassF, // BTB's guess at instruction class // update input logic PredictionInstrClassWrongM, // BTB's instruction class guess was wrong + input logic AnyWrongPredInstrClassE, input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb input logic [`XLEN-1:0] IEUAdrM, // Branch/jump target address to insert into btb input logic [3:0] InstrClassD, // Instruction class to insert into btb @@ -53,9 +55,11 @@ module btb #(parameter Depth = 10 ) ( logic MatchD, MatchE, MatchM, MatchW, MatchX; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] TableBTBPredF; - logic UpdateEn; logic [`XLEN-1:0] IEUAdrW; logic [`XLEN-1:0] PCW; + logic BTAWrongE, BTBWrongE; + logic BTBWrongM; + // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. @@ -87,14 +91,22 @@ module btb #(parameter Depth = 10 ) ( assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF}; - assign UpdateEn = |InstrClassM | PredictionInstrClassWrongM; // An optimization may be using a PC relative address. ram2p1r1wbe #(2**Depth, `XLEN+4) memory( .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF), - .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(UpdateEn), .bwe2('1)); + .ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1)); flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD); + + // BTAE is not strickly necessary. However it is used by two parts of wally. + // 1. It gates updates to the BTB when the prediction does not change. This save power. + // 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong. + flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE); + assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]); + assign BTBWrongE = BTAWrongE | AnyWrongPredInstrClassE; + flopenrc #(1) BTBWrongMReg(clk, reset, FlushM, ~StallM, BTBWrongE, BTBWrongM); + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);