Converted to using the BTB to predict the instruction class.

This commit is contained in:
Ross Thompson 2021-03-04 09:23:35 -06:00
parent 7592a0dacb
commit 52d95d415f
3 changed files with 50 additions and 39 deletions

View File

@ -35,11 +35,13 @@ module BTBPredictor
input logic reset,
input logic [`XLEN-1:0] LookUpPC,
output logic [`XLEN-1:0] TargetPC,
output logic [3:0] InstrClass,
output logic Valid,
// update
input logic UpdateEN,
input logic [`XLEN-1:0] UpdatePC,
input logic [`XLEN-1:0] UpdateTarget
input logic [`XLEN-1:0] UpdateTarget,
input logic [3:0] UpdateInstrClass
);
localparam TotalDepth = 2 ** Depth;
@ -82,15 +84,15 @@ module BTBPredictor
// and other indirection branch data.
// Another optimization may be using a PC relative address.
SRAM2P1R1W #(Depth, `XLEN) memory(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1(TargetPC),
.REN1(1'b1),
.WA1(UpdatePCIndex),
.WD1(UpdateTarget),
.WEN1(UpdateEN),
.BitWEN1({`XLEN{1'b1}}));
SRAM2P1R1W #(Depth, `XLEN+4) memory(.clk(clk),
.reset(reset),
.RA1(LookUpPCIndex),
.RD1({{InstrClass, TargetPC}}),
.REN1(1'b1),
.WA1(UpdatePCIndex),
.WD1({UpdateInstrClass, UpdateTarget}),
.WEN1(UpdateEN),
.BitWEN1({`XLEN{1'b1}}));
endmodule

View File

@ -36,9 +36,6 @@ module bpred
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
output logic [`XLEN-1:0] BPPredPCF,
output logic SelBPPredF,
input logic [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is.
// if this is too slow we will have to predict the type of instruction.
// Execute state
// Update Predictor
input logic [`XLEN-1:0] PCE, // The address of the currently executing instruction
// 1 hot encoding
@ -50,6 +47,7 @@ module bpred
input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic [3:0] InstrClassE,
// Report branch prediction status
output logic BPPredWrongE
);
@ -57,7 +55,7 @@ module bpred
logic BTBValidF;
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
logic [3:0] BPInstrClassF, BPInstrClassD, BPInstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic TargetWrongE;
logic FallThroughWrongE;
@ -65,17 +63,8 @@ module bpred
logic PredictionPCWrongE;
logic [`XLEN-1:0] CorrectPCE;
// Part 1 decode the instruction class.
// *** for now I'm skiping the compressed instructions
assign InstrClassF[3] = InstrF[6:0] == 7'h67 && InstrF[19:15] == 5'h01; // return
// This is probably too much logic.
// *** This also encourages me to switch to predicting the class.
assign InstrClassF[2] = InstrF[6:0] == 7'h67 && InstrF[19:15] != 5'h01; // jump register, but not return
assign InstrClassF[1] = InstrF[6:0] == 7'h6F; // jump
assign InstrClassF[0] = InstrF[6:0] == 7'h63; // branch
// Part 2 branch direction prediction
// Part 1 branch direction prediction
twoBitPredictor DirPredictor(.clk(clk),
.reset(reset),
@ -91,40 +80,42 @@ module bpred
// 2) Any information which is necessary for the predictor to built it's next state.
// For a 2 bit table this is the prediction count.
assign SelBPPredF = ((InstrClassF[0] & BPPredF[1] & BTBValidF) |
InstrClassF[3] |
(InstrClassF[2] & BTBValidF) |
InstrClassF[1] & BTBValidF) ;
assign SelBPPredF = ((BPInstrClassF[0] & BPPredF[1] & BTBValidF) |
BPInstrClassF[3] |
(BPInstrClassF[2] & BTBValidF) |
BPInstrClassF[1] & BTBValidF) ;
// Part 3 Branch target address prediction
// Part 2 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets
BTBPredictor TargetPredictor(.clk(clk),
.reset(reset),
.LookUpPC(PCNextF),
.TargetPC(BTBPredPCF),
.InstrClass(BPInstrClassF),
.Valid(BTBValidF),
// update
.UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
.UpdatePC(PCE),
.UpdateTarget(PCTargetE));
.UpdateTarget(PCTargetE),
.UpdateInstrClass(InstrClassE));
// need to forward when updating to the same address as reading.
//assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
//assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF;
// Part 4 RAS
// Part 3 RAS
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
RASPredictor RASPredictor(.clk(clk),
.reset(reset),
.pop(InstrClassF[3]),
.pop(BPInstrClassF[3]),
.popPC(RASPCF),
.push(InstrClassE[3]),
.incr(1'b0),
.pushPC(PCLinkE));
assign BPPredPCF = InstrClassF[3] ? RASPCF : BTBPredPCF;
assign BPPredPCF = BPInstrClassF[3] ? RASPCF : BTBPredPCF;
@ -150,15 +141,17 @@ module bpred
.reset(reset),
.en(~StallF),
.clear(FlushF),
.d(InstrClassF),
.q(InstrClassD));
.d(BPInstrClassF),
.q(BPInstrClassD));
flopenrc #(4) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(InstrClassD),
.q(InstrClassE));
.d(BPInstrClassD),
.q(BPInstrClassE));
// Check the prediction makes execution.
assign TargetWrongE = PCTargetE != PCD;

View File

@ -67,6 +67,8 @@ module ifu (
// branch predictor signals
logic SelBPPredF;
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
logic [3:0] InstrClassD, InstrClassE;
// *** put memory interface on here, InstrF becomes output
@ -109,13 +111,12 @@ module ifu (
.PCNextF(PCNextF),
.BPPredPCF(BPPredPCF),
.SelBPPredF(SelBPPredF),
.InstrF(InstrF), // *** this is flushed internally. The logic is redundant with some out here.
// Also I believe this port will be removed.
.PCE(PCE),
.PCSrcE(PCSrcE),
.PCTargetE(PCTargetE),
.PCD(PCD),
.PCLinkE(PCLinkE),
.InstrClassE(InstrClassE),
.BPPredWrongE(BPPredWrongE));
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
@ -142,6 +143,14 @@ module ifu (
assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
// *** combine these with others in better way, including M, F
// the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns.
assign InstrClassD[3] = InstrD[6:0] == 7'h67 && InstrD[19:15] == 5'h01; // return
assign InstrClassD[2] = InstrD[6:0] == 7'h67 && InstrD[19:15] != 5'h01; // jump register, but not return
assign InstrClassD[1] = InstrD[6:0] == 7'h6F; // jump
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
// Misaligned PC logic
generate
@ -164,6 +173,13 @@ module ifu (
flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM);
flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later
flopenrc #(4) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(InstrClassD),
.q(InstrClassE));
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
// have dedicated adder in Mem stage based on PCM + 2 or 4