More branch predictor improvements.

This commit is contained in:
Ross Thompson 2023-01-25 16:03:02 -06:00
parent 3d285312f0
commit 56aa798d5c
4 changed files with 60 additions and 66 deletions

View File

@ -64,7 +64,7 @@ module bpred (
logic [1:0] DirPredictionF; logic [1:0] DirPredictionF;
logic [3:0] PredInstrClassF, PredInstrClassD, PredInstrClassE; logic [3:0] PredInstrClassF, PredInstrClassD, PredInstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic [`XLEN-1:0] PredPCF, RASPCF;
logic TargetWrongE; logic TargetWrongE;
logic FallThroughWrongE; logic FallThroughWrongE;
logic PredictionPCWrongE; logic PredictionPCWrongE;
@ -95,8 +95,8 @@ module bpred (
speculativeglobalhistory #(10) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, speculativeglobalhistory #(10) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrW(InstrClassW[0]), .PCSrcE); .BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE);
end else if (`BPTYPE == "BPGSHARE") begin:Predictor end else if (`BPTYPE == "BPGSHARE") begin:Predictor
gshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM, gshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushD, .FlushE, .FlushM,
.PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE, .PCNextF, .PCM, .DirPredictionF, .DirPredictionWrongE,
@ -135,15 +135,12 @@ module bpred (
// Part 2 Branch target address prediction // Part 2 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets // *** For now the BTB will house the direct and indirect targets
btb TargetPredictor(.clk(clk), btb TargetPredictor(.clk, .reset, .StallF, .StallD, .StallM, .FlushD, .FlushM,
.reset(reset), .PCNextF, .PCF, .PCD, .PCE,
.*, // Stalls and flushes .PredPCF,
.PCNextF,
.BTBPredPCF,
.PredInstrClassF, .PredInstrClassF,
.PredValidF, .PredValidF,
.PredictionInstrClassWrongE, .PredictionInstrClassWrongE,
.PCE,
.IEUAdrE, .IEUAdrE,
.InstrClassE); .InstrClassE);
@ -160,7 +157,7 @@ module bpred (
.incr(1'b0), .incr(1'b0),
.PCLinkE); .PCLinkE);
assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTBPredPCF; assign BPPredPCF = PredInstrClassF[2] ? RASPCF : PredPCF;
// the branch predictor needs a compact decoding of the instruction class. // the branch predictor needs a compact decoding of the instruction class.
assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5 assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5

View File

@ -35,16 +35,15 @@ module btb
) )
(input logic clk, (input logic clk,
input logic reset, input logic reset,
input logic StallF, StallD, StallE, StallM, FlushD, FlushM, input logic StallF, StallD, StallM, FlushD, FlushM,
input logic [`XLEN-1:0] PCNextF, PCF, PCD, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, // PC at various stages
output logic [`XLEN-1:0] BTBPredPCF, output logic [`XLEN-1:0] PredPCF, // BTB's guess at PC
output logic [3:0] PredInstrClassF, output logic [3:0] PredInstrClassF, // BTB's guess at instruction class
output logic PredValidF, output logic PredValidF, // BTB's guess is valid
// update // update
input logic PredictionInstrClassWrongE, input logic PredictionInstrClassWrongE, // BTB's instruction class guess was wrong
input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] IEUAdrE, // Branch/jump target address to insert into btb
input logic [`XLEN-1:0] IEUAdrE, input logic [3:0] InstrClassE // Instruction class to insert into btb
input logic [3:0] InstrClassE
); );
localparam TotalDepth = 2 ** Depth; localparam TotalDepth = 2 ** Depth;
@ -54,10 +53,10 @@ module btb
logic MatchF, MatchD, MatchE, MatchNextX, MatchXF; logic MatchF, MatchD, MatchE, MatchNextX, MatchXF;
logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF; logic [`XLEN+3:0] ForwardBTBPrediction, ForwardBTBPredictionF;
logic [`XLEN+3:0] TableBTBPredictionF; logic [`XLEN+3:0] TableBTBPredictionF;
logic [`XLEN-1:0] BTBPredPCD; logic [`XLEN-1:0] PredPCD;
logic [3:0] PredInstrClassD; // copy of reg outside module logic [3:0] PredInstrClassD; // *** copy of reg outside module
logic UpdateEn;
// hashing function for indexing the PC // hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input. // We have Depth bits to index, but XLEN bits as the input.
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
@ -80,28 +79,30 @@ module btb
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardBTBPrediction = MatchF ? {PredInstrClassF, BTBPredPCF} : assign ForwardBTBPrediction = MatchF ? {PredInstrClassF, PredPCF} :
MatchD ? {PredInstrClassD, BTBPredPCD} : MatchD ? {PredInstrClassD, PredPCD} :
{InstrClassE, IEUAdrE} ; {InstrClassE, IEUAdrE} ;
flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF); flopenr #(`XLEN+4) ForwardBTBPredicitonReg(clk, reset, ~StallF, ForwardBTBPrediction, ForwardBTBPredictionF);
assign {PredInstrClassF, BTBPredPCF} = MatchXF ? ForwardBTBPredictionF : TableBTBPredictionF; assign {PredInstrClassF, PredPCF} = MatchXF ? ForwardBTBPredictionF : TableBTBPredictionF;
always_ff @ (posedge clk) begin always_ff @ (posedge clk) begin
if (reset) begin if (reset) begin
ValidBits <= #1 {TotalDepth{1'b0}}; ValidBits <= #1 {TotalDepth{1'b0}};
end else if ((|InstrClassE | PredictionInstrClassWrongE) & ~StallM & ~FlushM) begin end else if ((UpdateEn) & ~StallM & ~FlushM) begin
ValidBits[PCEIndex] <= #1 |InstrClassE; ValidBits[PCEIndex] <= #1 |InstrClassE;
end end
PredValidF = ValidBits[PCNextFIndex]; PredValidF = ValidBits[PCNextFIndex];
end end
assign UpdateEn = |InstrClassE | PredictionInstrClassWrongE;
// An optimization may be using a PC relative address. // An optimization may be using a PC relative address.
ram2p1r1wbe #(2**Depth, `XLEN+4) memory( ram2p1r1wbe #(2**Depth, `XLEN+4) memory(
.clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF), .clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredictionF),
.ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE, IEUAdrE}), .we2(UpdateEN), .bwe2('1)); .ce2(~StallM & ~FlushM), .wa2(PCEIndex), .wd2({InstrClassE, IEUAdrE}), .we2(UpdateEn), .bwe2('1));
flopenrc #(`XLEN+4) BTBD(clk, reset, FlushD, ~StallD, {PredInstrClassF, BTBPredPCF}, {PredInstrClassD, BTBPredPCD}); flopenrc #(`XLEN+4) BTBD(clk, reset, FlushD, ~StallD, {PredInstrClassF, PredPCF}, {PredInstrClassD, PredPCD});
endmodule endmodule

View File

@ -40,52 +40,56 @@ module speculativeglobalhistory
output logic DirPredictionWrongE, output logic DirPredictionWrongE,
// update // update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW, input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW,
input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE input logic PCSrcE
); );
logic MatchF, MatchD, MatchE, MatchM, MatchW; logic MatchF, MatchD, MatchE;
logic MatchNextX, MatchXF; logic MatchNextX, MatchXF;
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE;
logic [k-1:0] GHRF; logic [k-1:0] GHRF;
logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW; logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW;
logic [k-1:0] GHRNextF; logic [k-1:0] GHRNextF;
logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW; logic [k:-1] GHRNextD, OldGHRD;
logic PCSrcM, PCSrcW; logic [k:0] GHRNextE, GHRNextM, GHRNextW;
logic [k-1:0] IndexNextF, IndexF;
logic [k-1:0] IndexD, IndexE;
logic [`XLEN-1:0] PCW; logic [`XLEN-1:0] PCW;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
assign IndexNextF = GHRNextF;
assign IndexF = GHRF;
assign IndexD = GHRD[k-1:0];
assign IndexE = GHRE[k-1:0];
ram2p1r1wbe #(2**k, 2) PHT(.clk(clk), ram2p1r1wbe #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallW & ~FlushW), .ce1(~StallF | reset), .ce2(~StallM & ~FlushM),
.ra1(GHRNextF), .ra1(IndexNextF),
.rd1(TableDirPredictionF), .rd1(TableDirPredictionF),
.wa2(GHRW[k-1:0]), .wa2(IndexE),
.wd2(NewDirPredictionW), .wd2(NewDirPredictionE),
.we2(BranchInstrW & ~StallW & ~FlushW), .we2(BranchInstrE & ~StallM & ~FlushM),
.bwe2(1'b1)); .bwe2(1'b1));
// if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage // if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage
// and then register for use in the Fetch stage. // and then register for use in the Fetch stage.
assign MatchF = BranchInstrF & ~FlushD & (GHRNextF == GHRF); assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF);
assign MatchD = BranchInstrD & ~FlushE & (GHRNextF == GHRD[k-1:0]); assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD);
assign MatchE = BranchInstrE & ~FlushM & (GHRNextF == GHRE[k-1:0]); assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE);
assign MatchM = BranchInstrM & ~FlushW & (GHRNextF == GHRM[k-1:0]); assign MatchNextX = MatchF | MatchD | MatchE;
assign MatchW = BranchInstrW & (GHRNextF == GHRW[k-1:0]);
assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW;
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF : assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF :
MatchD ? NewDirPredictionD : MatchD ? NewDirPredictionD :
MatchE ? NewDirPredictionE : NewDirPredictionE ;
MatchM ? NewDirPredictionM :
NewDirPredictionW;
flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF);
assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF;
@ -95,16 +99,11 @@ module speculativeglobalhistory
flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE); flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE);
// New prediction pipeline // New prediction pipeline
satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF)); assign NewDirPredictionF = {DirPredictionF[1], DirPredictionF[1]};
flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE));
flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM);
flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW);
// PCSrc pipeline
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
flopenrc #(1) PCSrcWReg(clk, reset, FlushW, ~StallW, PCSrcM, PCSrcW);
// GHR pipeline // GHR pipeline
assign GHRNextF = FlushD ? GHRNextD[k:1] : assign GHRNextF = FlushD ? GHRNextD[k:1] :
BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} :
@ -112,8 +111,11 @@ module speculativeglobalhistory
flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF); flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF);
assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF}; assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]};
flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD); flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD);
assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right
WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-1:-1] : // shift left
OldGHRD[k:0];
assign GHRNextE = FlushE ? GHRNextM : GHRD; assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE); flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);

View File

@ -49,7 +49,7 @@ module speculativegshare
logic MatchNextX, MatchXF; logic MatchNextX, MatchXF;
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM; logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE;
logic [k-1:0] GHRF; logic [k-1:0] GHRF;
logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW; logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW;
@ -59,7 +59,6 @@ module speculativegshare
logic [k-1:0] IndexNextF, IndexF; logic [k-1:0] IndexNextF, IndexF;
logic [k-1:0] IndexD, IndexE; logic [k-1:0] IndexD, IndexE;
logic PCSrcM, PCSrcW;
logic [`XLEN-1:0] PCW; logic [`XLEN-1:0] PCW;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF; logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
@ -104,12 +103,7 @@ module speculativegshare
flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE));
flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM);
// PCSrc pipeline
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
flopenrc #(1) PCSrcWReg(clk, reset, FlushW, ~StallW, PCSrcM, PCSrcW);
// GHR pipeline // GHR pipeline
assign GHRNextF = FlushD ? GHRNextD[k:1] : assign GHRNextF = FlushD ? GHRNextD[k:1] :
BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} : BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} :