mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Converted to using the BTB to predict the instruction class.
This commit is contained in:
parent
7592a0dacb
commit
52d95d415f
@ -35,11 +35,13 @@ module BTBPredictor
|
|||||||
input logic reset,
|
input logic reset,
|
||||||
input logic [`XLEN-1:0] LookUpPC,
|
input logic [`XLEN-1:0] LookUpPC,
|
||||||
output logic [`XLEN-1:0] TargetPC,
|
output logic [`XLEN-1:0] TargetPC,
|
||||||
|
output logic [3:0] InstrClass,
|
||||||
output logic Valid,
|
output logic Valid,
|
||||||
// update
|
// update
|
||||||
input logic UpdateEN,
|
input logic UpdateEN,
|
||||||
input logic [`XLEN-1:0] UpdatePC,
|
input logic [`XLEN-1:0] UpdatePC,
|
||||||
input logic [`XLEN-1:0] UpdateTarget
|
input logic [`XLEN-1:0] UpdateTarget,
|
||||||
|
input logic [3:0] UpdateInstrClass
|
||||||
);
|
);
|
||||||
|
|
||||||
localparam TotalDepth = 2 ** Depth;
|
localparam TotalDepth = 2 ** Depth;
|
||||||
@ -82,15 +84,15 @@ module BTBPredictor
|
|||||||
// and other indirection branch data.
|
// and other indirection branch data.
|
||||||
// Another optimization may be using a PC relative address.
|
// Another optimization may be using a PC relative address.
|
||||||
|
|
||||||
SRAM2P1R1W #(Depth, `XLEN) memory(.clk(clk),
|
SRAM2P1R1W #(Depth, `XLEN+4) memory(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.RA1(LookUpPCIndex),
|
.RA1(LookUpPCIndex),
|
||||||
.RD1(TargetPC),
|
.RD1({{InstrClass, TargetPC}}),
|
||||||
.REN1(1'b1),
|
.REN1(1'b1),
|
||||||
.WA1(UpdatePCIndex),
|
.WA1(UpdatePCIndex),
|
||||||
.WD1(UpdateTarget),
|
.WD1({UpdateInstrClass, UpdateTarget}),
|
||||||
.WEN1(UpdateEN),
|
.WEN1(UpdateEN),
|
||||||
.BitWEN1({`XLEN{1'b1}}));
|
.BitWEN1({`XLEN{1'b1}}));
|
||||||
|
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -36,9 +36,6 @@ module bpred
|
|||||||
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
|
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
|
||||||
output logic [`XLEN-1:0] BPPredPCF,
|
output logic [`XLEN-1:0] BPPredPCF,
|
||||||
output logic SelBPPredF,
|
output logic SelBPPredF,
|
||||||
input logic [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is.
|
|
||||||
// if this is too slow we will have to predict the type of instruction.
|
|
||||||
// Execute state
|
|
||||||
// Update Predictor
|
// Update Predictor
|
||||||
input logic [`XLEN-1:0] PCE, // The address of the currently executing instruction
|
input logic [`XLEN-1:0] PCE, // The address of the currently executing instruction
|
||||||
// 1 hot encoding
|
// 1 hot encoding
|
||||||
@ -50,6 +47,7 @@ module bpred
|
|||||||
input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
|
input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
|
||||||
input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
|
input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
|
||||||
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||||
|
input logic [3:0] InstrClassE,
|
||||||
// Report branch prediction status
|
// Report branch prediction status
|
||||||
output logic BPPredWrongE
|
output logic BPPredWrongE
|
||||||
);
|
);
|
||||||
@ -57,7 +55,7 @@ module bpred
|
|||||||
logic BTBValidF;
|
logic BTBValidF;
|
||||||
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
|
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
|
||||||
|
|
||||||
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
|
logic [3:0] BPInstrClassF, BPInstrClassD, BPInstrClassE;
|
||||||
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
||||||
logic TargetWrongE;
|
logic TargetWrongE;
|
||||||
logic FallThroughWrongE;
|
logic FallThroughWrongE;
|
||||||
@ -65,17 +63,8 @@ module bpred
|
|||||||
logic PredictionPCWrongE;
|
logic PredictionPCWrongE;
|
||||||
logic [`XLEN-1:0] CorrectPCE;
|
logic [`XLEN-1:0] CorrectPCE;
|
||||||
|
|
||||||
// Part 1 decode the instruction class.
|
|
||||||
// *** for now I'm skiping the compressed instructions
|
|
||||||
assign InstrClassF[3] = InstrF[6:0] == 7'h67 && InstrF[19:15] == 5'h01; // return
|
|
||||||
// This is probably too much logic.
|
|
||||||
// *** This also encourages me to switch to predicting the class.
|
|
||||||
|
|
||||||
assign InstrClassF[2] = InstrF[6:0] == 7'h67 && InstrF[19:15] != 5'h01; // jump register, but not return
|
// Part 1 branch direction prediction
|
||||||
assign InstrClassF[1] = InstrF[6:0] == 7'h6F; // jump
|
|
||||||
assign InstrClassF[0] = InstrF[6:0] == 7'h63; // branch
|
|
||||||
|
|
||||||
// Part 2 branch direction prediction
|
|
||||||
|
|
||||||
twoBitPredictor DirPredictor(.clk(clk),
|
twoBitPredictor DirPredictor(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
@ -91,40 +80,42 @@ module bpred
|
|||||||
// 2) Any information which is necessary for the predictor to built it's next state.
|
// 2) Any information which is necessary for the predictor to built it's next state.
|
||||||
// For a 2 bit table this is the prediction count.
|
// For a 2 bit table this is the prediction count.
|
||||||
|
|
||||||
assign SelBPPredF = ((InstrClassF[0] & BPPredF[1] & BTBValidF) |
|
assign SelBPPredF = ((BPInstrClassF[0] & BPPredF[1] & BTBValidF) |
|
||||||
InstrClassF[3] |
|
BPInstrClassF[3] |
|
||||||
(InstrClassF[2] & BTBValidF) |
|
(BPInstrClassF[2] & BTBValidF) |
|
||||||
InstrClassF[1] & BTBValidF) ;
|
BPInstrClassF[1] & BTBValidF) ;
|
||||||
|
|
||||||
|
|
||||||
// Part 3 Branch target address prediction
|
// Part 2 Branch target address prediction
|
||||||
// *** For now the BTB will house the direct and indirect targets
|
// *** For now the BTB will house the direct and indirect targets
|
||||||
|
|
||||||
BTBPredictor TargetPredictor(.clk(clk),
|
BTBPredictor TargetPredictor(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.LookUpPC(PCNextF),
|
.LookUpPC(PCNextF),
|
||||||
.TargetPC(BTBPredPCF),
|
.TargetPC(BTBPredPCF),
|
||||||
|
.InstrClass(BPInstrClassF),
|
||||||
.Valid(BTBValidF),
|
.Valid(BTBValidF),
|
||||||
// update
|
// update
|
||||||
.UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
|
.UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
|
||||||
.UpdatePC(PCE),
|
.UpdatePC(PCE),
|
||||||
.UpdateTarget(PCTargetE));
|
.UpdateTarget(PCTargetE),
|
||||||
|
.UpdateInstrClass(InstrClassE));
|
||||||
|
|
||||||
// need to forward when updating to the same address as reading.
|
// need to forward when updating to the same address as reading.
|
||||||
//assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
|
//assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
|
||||||
//assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF;
|
//assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCF;
|
||||||
|
|
||||||
// Part 4 RAS
|
// Part 3 RAS
|
||||||
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
|
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
|
||||||
RASPredictor RASPredictor(.clk(clk),
|
RASPredictor RASPredictor(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.pop(InstrClassF[3]),
|
.pop(BPInstrClassF[3]),
|
||||||
.popPC(RASPCF),
|
.popPC(RASPCF),
|
||||||
.push(InstrClassE[3]),
|
.push(InstrClassE[3]),
|
||||||
.incr(1'b0),
|
.incr(1'b0),
|
||||||
.pushPC(PCLinkE));
|
.pushPC(PCLinkE));
|
||||||
|
|
||||||
assign BPPredPCF = InstrClassF[3] ? RASPCF : BTBPredPCF;
|
assign BPPredPCF = BPInstrClassF[3] ? RASPCF : BTBPredPCF;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -150,15 +141,17 @@ module bpred
|
|||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(~StallF),
|
.en(~StallF),
|
||||||
.clear(FlushF),
|
.clear(FlushF),
|
||||||
.d(InstrClassF),
|
.d(BPInstrClassF),
|
||||||
.q(InstrClassD));
|
.q(BPInstrClassD));
|
||||||
|
|
||||||
flopenrc #(4) InstrClassRegE(.clk(clk),
|
flopenrc #(4) InstrClassRegE(.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.en(~StallD),
|
.en(~StallD),
|
||||||
.clear(FlushD),
|
.clear(FlushD),
|
||||||
.d(InstrClassD),
|
.d(BPInstrClassD),
|
||||||
.q(InstrClassE));
|
.q(BPInstrClassE));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Check the prediction makes execution.
|
// Check the prediction makes execution.
|
||||||
assign TargetWrongE = PCTargetE != PCD;
|
assign TargetWrongE = PCTargetE != PCD;
|
||||||
|
@ -67,6 +67,8 @@ module ifu (
|
|||||||
// branch predictor signals
|
// branch predictor signals
|
||||||
logic SelBPPredF;
|
logic SelBPPredF;
|
||||||
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
|
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
|
||||||
|
logic [3:0] InstrClassD, InstrClassE;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// *** put memory interface on here, InstrF becomes output
|
// *** put memory interface on here, InstrF becomes output
|
||||||
@ -109,13 +111,12 @@ module ifu (
|
|||||||
.PCNextF(PCNextF),
|
.PCNextF(PCNextF),
|
||||||
.BPPredPCF(BPPredPCF),
|
.BPPredPCF(BPPredPCF),
|
||||||
.SelBPPredF(SelBPPredF),
|
.SelBPPredF(SelBPPredF),
|
||||||
.InstrF(InstrF), // *** this is flushed internally. The logic is redundant with some out here.
|
|
||||||
// Also I believe this port will be removed.
|
|
||||||
.PCE(PCE),
|
.PCE(PCE),
|
||||||
.PCSrcE(PCSrcE),
|
.PCSrcE(PCSrcE),
|
||||||
.PCTargetE(PCTargetE),
|
.PCTargetE(PCTargetE),
|
||||||
.PCD(PCD),
|
.PCD(PCD),
|
||||||
.PCLinkE(PCLinkE),
|
.PCLinkE(PCLinkE),
|
||||||
|
.InstrClassE(InstrClassE),
|
||||||
.BPPredWrongE(BPPredWrongE));
|
.BPPredWrongE(BPPredWrongE));
|
||||||
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
|
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
|
||||||
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
|
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
|
||||||
@ -142,6 +143,14 @@ module ifu (
|
|||||||
assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
|
assign IllegalIEUInstrFaultD = IllegalBaseInstrFaultD | IllegalCompInstrD; // illegal if bad 32 or 16-bit instr
|
||||||
// *** combine these with others in better way, including M, F
|
// *** combine these with others in better way, including M, F
|
||||||
|
|
||||||
|
|
||||||
|
// the branch predictor needs a compact decoding of the instruction class.
|
||||||
|
// *** consider adding in the alternate return address x5 for returns.
|
||||||
|
assign InstrClassD[3] = InstrD[6:0] == 7'h67 && InstrD[19:15] == 5'h01; // return
|
||||||
|
assign InstrClassD[2] = InstrD[6:0] == 7'h67 && InstrD[19:15] != 5'h01; // jump register, but not return
|
||||||
|
assign InstrClassD[1] = InstrD[6:0] == 7'h6F; // jump
|
||||||
|
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
|
||||||
|
|
||||||
// Misaligned PC logic
|
// Misaligned PC logic
|
||||||
|
|
||||||
generate
|
generate
|
||||||
@ -164,6 +173,13 @@ module ifu (
|
|||||||
flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM);
|
flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM);
|
||||||
flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later
|
flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later
|
||||||
|
|
||||||
|
flopenrc #(4) InstrClassRegE(.clk(clk),
|
||||||
|
.reset(reset),
|
||||||
|
.en(~StallD),
|
||||||
|
.clear(FlushD),
|
||||||
|
.d(InstrClassD),
|
||||||
|
.q(InstrClassE));
|
||||||
|
|
||||||
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
// seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL.
|
||||||
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
// either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or
|
||||||
// have dedicated adder in Mem stage based on PCM + 2 or 4
|
// have dedicated adder in Mem stage based on PCM + 2 or 4
|
||||||
|
Loading…
Reference in New Issue
Block a user