Integrated the branch predictor into the hardward. Not yet working.

This commit is contained in:
Ross Thompson 2021-02-17 22:19:17 -06:00
parent ca546beaf8
commit bbe0db3ebe
8 changed files with 109 additions and 48 deletions

View File

@ -38,6 +38,11 @@ switch $argc {
vopt +acc work.testbench -o workopt vopt +acc work.testbench -o workopt
vsim workopt vsim workopt
# load the branch predictors with known data. The value of the data is not important for function, but
# is important for perventing pessimistic x propagation.
mem load -infile twoBitPredictor.txt -format bin testbench/dut/hart/ifu/bpred/DirPredictor/memory/memory
mem load -infile BTBPredictor.txt -format bin testbench/dut/hart/ifu/bpred/TargetPredictor/memory/memory
view wave view wave
-- display input and output signals as hexidecimal values -- display input and output signals as hexidecimal values

View File

@ -30,7 +30,7 @@ module hazard(
// input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
// input logic MemReadE, // input logic MemReadE,
// input logic RegWriteM, RegWriteW, // input logic RegWriteM, RegWriteW,
input logic PCSrcE, CSRWritePendingDEM, RetM, TrapM, input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, input logic LoadStallD,
input logic InstrStall, DataStall, input logic InstrStall, DataStall,
// Stall outputs // Stall outputs
@ -52,7 +52,7 @@ module hazard(
// A stage must stall if the next stage is stalled // A stage must stall if the next stage is stalled
// If any stages are stalled, the first stage that isn't stalled must flush. // If any stages are stalled, the first stage that isn't stalled must flush.
assign BranchFlushDE = PCSrcE | RetM | TrapM; assign BranchFlushDE = BPPredWrongE | RetM | TrapM;
assign StallDCause = LoadStallD; assign StallDCause = LoadStallD;
assign StallFCause = InstrStall | CSRWritePendingDEM; assign StallFCause = InstrStall | CSRWritePendingDEM;
@ -60,6 +60,7 @@ module hazard(
assign StallD = StallDCause; assign StallD = StallDCause;
assign StallF = StallD | StallFCause; assign StallF = StallD | StallFCause;
assign FlushF = BPPredWrongE;
assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM;
assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM;
assign FlushM = RetM | TrapM; assign FlushM = RetM | TrapM;

View File

@ -72,10 +72,10 @@ module BTBPredictor
.RA1(LookUpPCIndex), .RA1(LookUpPCIndex),
.RD1(TargetPC), .RD1(TargetPC),
.REN1(1'b1), .REN1(1'b1),
.WA1(UpdatePCindex), .WA1(UpdatePCIndex),
.WD1(UpdateTarget), .WD1(UpdateTarget),
.WEN1(UpdateEN), .WEN1(UpdateEN),
.BitWEN1({XLEN{1'b1}})); .BitWEN1({`XLEN{1'b1}}));
endmodule endmodule

View File

@ -6,7 +6,15 @@
// Created: February 14, 2021 // Created: February 14, 2021
// Modified: // Modified:
// //
// Purpose: Hacky two port SRAM model. // Purpose: Behavioral model of two port SRAM. While this is synthesizable it will produce a flip flop based memory whi
// behaves with the timing of an SRAM typical of GF 14nm, 32nm, and 45nm.
//
//
// to preload this memory we can use the following command
// in modelsim's do file.
// mem load -infile <relative path to the text file > -format <bin|hex> <hierarchy to the memory.>
// example
// mem laod -infile twoBitPredictor.txt -format bin testbench/dut/hart/ifu/bpred/DirPredictor/memory/memory
// //
// A component of the Wally configurable RISC-V project. // A component of the Wally configurable RISC-V project.
// //
@ -31,6 +39,7 @@ module SRAM2P1R1W
#(parameter int Depth = 10, #(parameter int Depth = 10,
parameter int Width = 2 parameter int Width = 2
) )
(input clk, (input clk,
// port 1 is read only // port 1 is read only
@ -46,14 +55,11 @@ module SRAM2P1R1W
); );
logic [Depth-1:0] RA1Q, WA1Q; logic [Depth-1:0] RA1Q, WA1Q;
logic WEN1Q; logic WEN1Q;
logic [Width-1:0] WD1Q; logic [Width-1:0] WD1Q;
logic [2**Depth-1:0] [Width-1:0] memory; logic [Width-1:0] memory [2**Depth-1:0];
// SRAMs address busses are always registered first. // SRAMs address busses are always registered first.
@ -92,7 +98,7 @@ module SRAM2P1R1W
for (index = 0; index < Width; index = index + 1) begin for (index = 0; index < Width; index = index + 1) begin
always_ff @ (posedge clk) begin always_ff @ (posedge clk) begin
if (WEN1Q & BitWEN1[index]) begin if (WEN1Q & BitWEN1[index]) begin
memory[WA1Q][index] = WD1Q[index]; memory[WA1Q][index] <= WD1Q[index];
end end
end end
end end

View File

@ -58,7 +58,7 @@ module bpred
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE; logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
logic [3:0] InstrClassD, InstrClassF, InstrClassE; logic [3:0] InstrClassD, InstrClassF, InstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic [`XLEN-1:0] BTBPredPCF, RASPCF, BTBPredPCMemoryF;
logic TargetWrongE; logic TargetWrongE;
logic FallThroughWrongE; logic FallThroughWrongE;
logic PredictionDirWrongE; logic PredictionDirWrongE;
@ -71,13 +71,13 @@ module bpred
// This is probably too much logic. // This is probably too much logic.
// *** This also encourages me to switch to predicting the class. // *** This also encourages me to switch to predicting the class.
assign InstrClassF[2] = InstrF[5:0] == 7'h67 && InstrF[19:15] == 5'h01; // jump register, but not return assign InstrClassF[2] = InstrF[6:0] == 7'h67 && InstrF[19:15] == 5'h01; // jump register, but not return
assign InstrClassF[1] = InstrF[5:0] == 7'h6F; // jump assign InstrClassF[1] = InstrF[6:0] == 7'h6F; // jump
assign InstrClassF[0] = InstrF[5:0] == 7'h63; // branch assign InstrClassF[0] = InstrF[6:0] == 7'h63; // branch
// Part 2 branch direction prediction // Part 2 branch direction prediction
twoBitPredictor predictor(.clk(clk), twoBitPredictor DirPredictor(.clk(clk),
.LookUpPC(PCNextF), .LookUpPC(PCNextF),
.Prediction(BPPredF), .Prediction(BPPredF),
// update // update
@ -99,7 +99,7 @@ module bpred
// Part 3 Branch target address prediction // Part 3 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets // *** For now the BTB will house the direct and indirect targets
BTBPredictor targetPredictor(.clk(clk), BTBPredictor TargetPredictor(.clk(clk),
.reset(reset), .reset(reset),
.LookUpPC(PCNextF), .LookUpPC(PCNextF),
.TargetPC(BTBPredPCMemoryF), .TargetPC(BTBPredPCMemoryF),
@ -111,7 +111,7 @@ module bpred
// need to forward when updating to the same address as reading. // need to forward when updating to the same address as reading.
assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE; assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
assign TargetPC = (UpdatePC == LookUpPC) ? CorrectPCE : BTBPredPCMemoryF; assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCMemoryF;
// Part 4 RAS // Part 4 RAS
// *** need to add the logic to restore RAS on flushes. We will use incr for this. // *** need to add the logic to restore RAS on flushes. We will use incr for this.
@ -152,7 +152,7 @@ module bpred
.d(InstrClassF), .d(InstrClassF),
.q(InstrClassD)); .q(InstrClassD));
flopenr #(4) InstrClassRegE(.clk(clk), flopenrc #(4) InstrClassRegE(.clk(clk),
.reset(reset), .reset(reset),
.en(~StallD), .en(~StallD),
.clear(flushD), .clear(flushD),

View File

@ -28,7 +28,7 @@
module ifu ( module ifu (
input logic clk, reset, input logic clk, reset,
input logic StallF, StallD, FlushD, FlushE, FlushM, FlushW, input logic StallF, StallD, FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch // Fetch
input logic [31:0] InstrF, input logic [31:0] InstrF,
output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] PCF,
@ -39,6 +39,7 @@ module ifu (
input logic PCSrcE, input logic PCSrcE,
input logic [`XLEN-1:0] PCTargetE, input logic [`XLEN-1:0] PCTargetE,
output logic [`XLEN-1:0] PCE, output logic [`XLEN-1:0] PCE,
output logic BPPredWrongE,
// Mem // Mem
input logic RetM, TrapM, input logic RetM, TrapM,
input logic [`XLEN-1:0] PrivilegedNextPCM, input logic [`XLEN-1:0] PrivilegedNextPCM,
@ -62,6 +63,11 @@ module ifu (
logic [31:0] InstrRawD, InstrE; logic [31:0] InstrRawD, InstrE;
logic [31:0] nop = 32'h00000013; // instruction for NOP logic [31:0] nop = 32'h00000013; // instruction for NOP
// branch predictor signals
logic SelBPPredF;
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
// *** put memory interface on here, InstrF becomes output // *** put memory interface on here, InstrF becomes output
assign InstrStall = 0; // *** assign InstrStall = 0; // ***
assign InstrPAdrF = PCF; // *** no MMU assign InstrPAdrF = PCF; // *** no MMU
@ -70,10 +76,49 @@ module ifu (
assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM);
mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); //mux3 #(`XLEN) pcmux(PCPlus2or4F, PCCorrectE, PrivilegedNextPCM, {PrivilegedChangePCM, BPPredWrongE}, UnalignedPCNextF);
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F),
.d1(BPPredPCF),
.s(SelBPPredF),
.y(PCNext0F));
mux2 #(`XLEN) pcmux1(.d0(PCNext0F),
.d1(PCCorrectE),
.s(BPPredWrongE),
.y(PCNext1F));
mux2 #(`XLEN) pcmux2(.d0(PCNext1F),
.d1(PrivilegedNextPCM),
.s(PrivilegedChangePCM),
.y(UnalignedPCNextF));
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF);
// branch and jump predictor
// I am making the port connection explicit for now as I want to see them and they will be changing.
bpred bpred(.clk(clk),
.reset(reset),
.StallF(StallF),
.StallD(StallD),
.StallE(1'b0), // *** may need this eventually
.FlushF(FlushF),
.FlushD(FlushD),
.FlushE(FlushE),
.PCNextF(PCNextF),
.BPPredPCF(BPPredPCF),
.SelBPPredF(SelBPPredF),
.InstrF(InstrF), // *** this is flushed internally. The logic is redundant with some out here.
// Also I believe this port will be removed.
.PCE(PCE),
.PCSrcE(PCSrcE),
.PCTargetE(PCTargetE),
.PCD(PCD),
.PCLinkE(PCLinkE),
.BPPredWrongE(BPPredWrongE));
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
// pcadder // pcadder
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction? assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction?

View File

@ -40,6 +40,8 @@ module twoBitPredictor
); );
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex; logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex;
logic [1:0] PredictionMemory;
// hashing function for indexing the PC // hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input. // We have Depth bits to index, but XLEN bits as the input.
@ -50,10 +52,10 @@ module twoBitPredictor
SRAM2P1R1W #(Depth, 2) memory(.clk(clk), SRAM2P1R1W #(Depth, 2) memory(.clk(clk),
.RA1(LookUpPC), .RA1(LookUpPCIndex),
.RD1(PredictionMemory), .RD1(PredictionMemory),
.REN1(1'b1), .REN1(1'b1),
.WA1(UpdatePC), .WA1(UpdatePCIndex),
.WD1(UpdatePrediction), .WD1(UpdatePrediction),
.WEN1(UpdateEN), .WEN1(UpdateEN),
.BitWEN1(2'b11)); .BitWEN1(2'b11));

View File

@ -49,7 +49,7 @@ module wallypipelinedhart (
); );
logic [1:0] ForwardAE, ForwardBE; logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, FlushD, FlushE, FlushM, FlushW; logic StallF, StallD, FlushF, FlushD, FlushE, FlushM, FlushW;
logic RetM, TrapM; logic RetM, TrapM;
// new signals that must connect through DP // new signals that must connect through DP
@ -86,6 +86,8 @@ module wallypipelinedhart (
logic [`XLEN-1:0] InstrPAdrF; logic [`XLEN-1:0] InstrPAdrF;
logic DataStall, InstrStall; logic DataStall, InstrStall;
logic InstrAckD, MemAckW; logic InstrAckD, MemAckW;
logic BPPredWrongE;
ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache