Integrated the branch predictor into the hardward. Not yet working.

This commit is contained in:
Ross Thompson 2021-02-17 22:19:17 -06:00
parent 78db3654c6
commit 5df7e959f3
8 changed files with 109 additions and 48 deletions

View File

@ -38,6 +38,11 @@ switch $argc {
vopt +acc work.testbench -o workopt
vsim workopt
# load the branch predictors with known data. The value of the data is not important for function, but
# is important for perventing pessimistic x propagation.
mem load -infile twoBitPredictor.txt -format bin testbench/dut/hart/ifu/bpred/DirPredictor/memory/memory
mem load -infile BTBPredictor.txt -format bin testbench/dut/hart/ifu/bpred/TargetPredictor/memory/memory
view wave
-- display input and output signals as hexidecimal values

View File

@ -30,7 +30,7 @@ module hazard(
// input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
// input logic MemReadE,
// input logic RegWriteM, RegWriteW,
input logic PCSrcE, CSRWritePendingDEM, RetM, TrapM,
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD,
input logic InstrStall, DataStall,
// Stall outputs
@ -52,7 +52,7 @@ module hazard(
// A stage must stall if the next stage is stalled
// If any stages are stalled, the first stage that isn't stalled must flush.
assign BranchFlushDE = PCSrcE | RetM | TrapM;
assign BranchFlushDE = BPPredWrongE | RetM | TrapM;
assign StallDCause = LoadStallD;
assign StallFCause = InstrStall | CSRWritePendingDEM;
@ -60,6 +60,7 @@ module hazard(
assign StallD = StallDCause;
assign StallF = StallD | StallFCause;
assign FlushF = BPPredWrongE;
assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM;
assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM;
assign FlushM = RetM | TrapM;

View File

@ -72,10 +72,10 @@ module BTBPredictor
.RA1(LookUpPCIndex),
.RD1(TargetPC),
.REN1(1'b1),
.WA1(UpdatePCindex),
.WA1(UpdatePCIndex),
.WD1(UpdateTarget),
.WEN1(UpdateEN),
.BitWEN1({XLEN{1'b1}}));
.BitWEN1({`XLEN{1'b1}}));
endmodule

View File

@ -6,8 +6,16 @@
// Created: February 14, 2021
// Modified:
//
// Purpose: Hacky two port SRAM model.
// Purpose: Behavioral model of two port SRAM. While this is synthesizable it will produce a flip flop based memory whi
// behaves with the timing of an SRAM typical of GF 14nm, 32nm, and 45nm.
//
//
// to preload this memory we can use the following command
// in modelsim's do file.
// mem load -infile <relative path to the text file > -format <bin|hex> <hierarchy to the memory.>
// example
// mem laod -infile twoBitPredictor.txt -format bin testbench/dut/hart/ifu/bpred/DirPredictor/memory/memory
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
@ -30,7 +38,8 @@
module SRAM2P1R1W
#(parameter int Depth = 10,
parameter int Width = 2
)
)
(input clk,
// port 1 is read only
@ -45,16 +54,13 @@ module SRAM2P1R1W
input logic [Width-1:0] BitWEN1
);
logic [Depth-1:0] RA1Q, WA1Q;
logic WEN1Q;
logic [Width-1:0] WD1Q;
logic [2**Depth-1:0] [Width-1:0] memory;
logic [Width-1:0] memory [2**Depth-1:0];
// SRAMs address busses are always registered first.
@ -92,7 +98,7 @@ module SRAM2P1R1W
for (index = 0; index < Width; index = index + 1) begin
always_ff @ (posedge clk) begin
if (WEN1Q & BitWEN1[index]) begin
memory[WA1Q][index] = WD1Q[index];
memory[WA1Q][index] <= WD1Q[index];
end
end
end

View File

@ -58,7 +58,7 @@ module bpred
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic [`XLEN-1:0] BTBPredPCF, RASPCF, BTBPredPCMemoryF;
logic TargetWrongE;
logic FallThroughWrongE;
logic PredictionDirWrongE;
@ -71,19 +71,19 @@ module bpred
// This is probably too much logic.
// *** This also encourages me to switch to predicting the class.
assign InstrClassF[2] = InstrF[5:0] == 7'h67 && InstrF[19:15] == 5'h01; // jump register, but not return
assign InstrClassF[1] = InstrF[5:0] == 7'h6F; // jump
assign InstrClassF[0] = InstrF[5:0] == 7'h63; // branch
assign InstrClassF[2] = InstrF[6:0] == 7'h67 && InstrF[19:15] == 5'h01; // jump register, but not return
assign InstrClassF[1] = InstrF[6:0] == 7'h6F; // jump
assign InstrClassF[0] = InstrF[6:0] == 7'h63; // branch
// Part 2 branch direction prediction
twoBitPredictor predictor(.clk(clk),
.LookUpPC(PCNextF),
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0]),
.UpdatePrediction(UpdateBPPredE));
twoBitPredictor DirPredictor(.clk(clk),
.LookUpPC(PCNextF),
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
.UpdateEN(InstrClassE[0]),
.UpdatePrediction(UpdateBPPredE));
// this predictor will have two pieces of data,
// 1) A direction (1 = Taken, 0 = Not Taken)
@ -99,7 +99,7 @@ module bpred
// Part 3 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets
BTBPredictor targetPredictor(.clk(clk),
BTBPredictor TargetPredictor(.clk(clk),
.reset(reset),
.LookUpPC(PCNextF),
.TargetPC(BTBPredPCMemoryF),
@ -111,7 +111,7 @@ module bpred
// need to forward when updating to the same address as reading.
assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
assign TargetPC = (UpdatePC == LookUpPC) ? CorrectPCE : BTBPredPCMemoryF;
assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCMemoryF;
// Part 4 RAS
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
@ -152,12 +152,12 @@ module bpred
.d(InstrClassF),
.q(InstrClassD));
flopenr #(4) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(flushD),
.d(InstrClassD),
.q(InstrClassE));
flopenrc #(4) InstrClassRegE(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(flushD),
.d(InstrClassD),
.q(InstrClassE));
// Check the prediction makes execution.
assign TargetWrongE = PCTargetE != PCD;

View File

@ -27,29 +27,30 @@
`include "wally-config.vh"
module ifu (
input logic clk, reset,
input logic StallF, StallD, FlushD, FlushE, FlushM, FlushW,
input logic clk, reset,
input logic StallF, StallD, FlushF, FlushD, FlushE, FlushM, FlushW,
// Fetch
input logic [31:0] InstrF,
input logic [31:0] InstrF,
output logic [`XLEN-1:0] PCF,
output logic [`XLEN-1:0] InstrPAdrF,
// Decode
output logic InstrStall,
output logic InstrStall,
// Execute
input logic PCSrcE,
input logic [`XLEN-1:0] PCTargetE,
output logic [`XLEN-1:0] PCE,
input logic PCSrcE,
input logic [`XLEN-1:0] PCTargetE,
output logic [`XLEN-1:0] PCE,
output logic BPPredWrongE,
// Mem
input logic RetM, TrapM,
input logic [`XLEN-1:0] PrivilegedNextPCM,
output logic [31:0] InstrD, InstrM,
input logic RetM, TrapM,
input logic [`XLEN-1:0] PrivilegedNextPCM,
output logic [31:0] InstrD, InstrM,
output logic [`XLEN-1:0] PCM,
// Writeback
output logic [`XLEN-1:0] PCLinkW,
// Faults
input logic IllegalBaseInstrFaultD,
output logic IllegalIEUInstrFaultD,
output logic InstrMisalignedFaultM,
input logic IllegalBaseInstrFaultD,
output logic IllegalIEUInstrFaultD,
output logic InstrMisalignedFaultM,
output logic [`XLEN-1:0] InstrMisalignedAdrM
);
@ -62,6 +63,11 @@ module ifu (
logic [31:0] InstrRawD, InstrE;
logic [31:0] nop = 32'h00000013; // instruction for NOP
// branch predictor signals
logic SelBPPredF;
logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F;
// *** put memory interface on here, InstrF becomes output
assign InstrStall = 0; // ***
assign InstrPAdrF = PCF; // *** no MMU
@ -70,10 +76,49 @@ module ifu (
assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM);
mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF);
//mux3 #(`XLEN) pcmux(PCPlus2or4F, PCCorrectE, PrivilegedNextPCM, {PrivilegedChangePCM, BPPredWrongE}, UnalignedPCNextF);
mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F),
.d1(BPPredPCF),
.s(SelBPPredF),
.y(PCNext0F));
mux2 #(`XLEN) pcmux1(.d0(PCNext0F),
.d1(PCCorrectE),
.s(BPPredWrongE),
.y(PCNext1F));
mux2 #(`XLEN) pcmux2(.d0(PCNext1F),
.d1(PrivilegedNextPCM),
.s(PrivilegedChangePCM),
.y(UnalignedPCNextF));
assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment
flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF);
// branch and jump predictor
// I am making the port connection explicit for now as I want to see them and they will be changing.
bpred bpred(.clk(clk),
.reset(reset),
.StallF(StallF),
.StallD(StallD),
.StallE(1'b0), // *** may need this eventually
.FlushF(FlushF),
.FlushD(FlushD),
.FlushE(FlushE),
.PCNextF(PCNextF),
.BPPredPCF(BPPredPCF),
.SelBPPredF(SelBPPredF),
.InstrF(InstrF), // *** this is flushed internally. The logic is redundant with some out here.
// Also I believe this port will be removed.
.PCE(PCE),
.PCSrcE(PCSrcE),
.PCTargetE(PCTargetE),
.PCD(PCD),
.PCLinkE(PCLinkE),
.BPPredWrongE(BPPredWrongE));
// The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE.
assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE;
// pcadder
// add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32
assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction?

View File

@ -40,6 +40,8 @@ module twoBitPredictor
);
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex;
logic [1:0] PredictionMemory;
// hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input.
@ -50,10 +52,10 @@ module twoBitPredictor
SRAM2P1R1W #(Depth, 2) memory(.clk(clk),
.RA1(LookUpPC),
.RA1(LookUpPCIndex),
.RD1(PredictionMemory),
.REN1(1'b1),
.WA1(UpdatePC),
.WA1(UpdatePCIndex),
.WD1(UpdatePrediction),
.WEN1(UpdateEN),
.BitWEN1(2'b11));

View File

@ -49,7 +49,7 @@ module wallypipelinedhart (
);
logic [1:0] ForwardAE, ForwardBE;
logic StallF, StallD, FlushD, FlushE, FlushM, FlushW;
logic StallF, StallD, FlushF, FlushD, FlushE, FlushM, FlushW;
logic RetM, TrapM;
// new signals that must connect through DP
@ -86,6 +86,8 @@ module wallypipelinedhart (
logic [`XLEN-1:0] InstrPAdrF;
logic DataStall, InstrStall;
logic InstrAckD, MemAckW;
logic BPPredWrongE;
ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache