diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index b7f41535..60046294 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -38,6 +38,11 @@ switch $argc { vopt +acc work.testbench -o workopt vsim workopt +# load the branch predictors with known data. The value of the data is not important for function, but +# is important for perventing pessimistic x propagation. +mem load -infile twoBitPredictor.txt -format bin testbench/dut/hart/ifu/bpred/DirPredictor/memory/memory +mem load -infile BTBPredictor.txt -format bin testbench/dut/hart/ifu/bpred/TargetPredictor/memory/memory + view wave -- display input and output signals as hexidecimal values diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 2fe0541a..7eb116eb 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -30,7 +30,7 @@ module hazard( // input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // input logic MemReadE, // input logic RegWriteM, RegWriteW, - input logic PCSrcE, CSRWritePendingDEM, RetM, TrapM, + input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, input logic InstrStall, DataStall, // Stall outputs @@ -52,7 +52,7 @@ module hazard( // A stage must stall if the next stage is stalled // If any stages are stalled, the first stage that isn't stalled must flush. - assign BranchFlushDE = PCSrcE | RetM | TrapM; + assign BranchFlushDE = BPPredWrongE | RetM | TrapM; assign StallDCause = LoadStallD; assign StallFCause = InstrStall | CSRWritePendingDEM; @@ -60,6 +60,7 @@ module hazard( assign StallD = StallDCause; assign StallF = StallD | StallFCause; + assign FlushF = BPPredWrongE; assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; assign FlushM = RetM | TrapM; diff --git a/wally-pipelined/src/ifu/BTBPredictor.sv b/wally-pipelined/src/ifu/BTBPredictor.sv index 86ff3778..8e9723ec 100644 --- a/wally-pipelined/src/ifu/BTBPredictor.sv +++ b/wally-pipelined/src/ifu/BTBPredictor.sv @@ -72,10 +72,10 @@ module BTBPredictor .RA1(LookUpPCIndex), .RD1(TargetPC), .REN1(1'b1), - .WA1(UpdatePCindex), + .WA1(UpdatePCIndex), .WD1(UpdateTarget), .WEN1(UpdateEN), - .BitWEN1({XLEN{1'b1}})); + .BitWEN1({`XLEN{1'b1}})); endmodule diff --git a/wally-pipelined/src/ifu/SramModel.sv b/wally-pipelined/src/ifu/SramModel.sv index d715d826..15d5699e 100644 --- a/wally-pipelined/src/ifu/SramModel.sv +++ b/wally-pipelined/src/ifu/SramModel.sv @@ -6,8 +6,16 @@ // Created: February 14, 2021 // Modified: // -// Purpose: Hacky two port SRAM model. +// Purpose: Behavioral model of two port SRAM. While this is synthesizable it will produce a flip flop based memory whi +// behaves with the timing of an SRAM typical of GF 14nm, 32nm, and 45nm. +// // +// to preload this memory we can use the following command +// in modelsim's do file. +// mem load -infile -format +// example +// mem laod -infile twoBitPredictor.txt -format bin testbench/dut/hart/ifu/bpred/DirPredictor/memory/memory +// // A component of the Wally configurable RISC-V project. // // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University @@ -30,7 +38,8 @@ module SRAM2P1R1W #(parameter int Depth = 10, parameter int Width = 2 - ) + ) + (input clk, // port 1 is read only @@ -45,16 +54,13 @@ module SRAM2P1R1W input logic [Width-1:0] BitWEN1 ); - - - logic [Depth-1:0] RA1Q, WA1Q; logic WEN1Q; logic [Width-1:0] WD1Q; - logic [2**Depth-1:0] [Width-1:0] memory; - + logic [Width-1:0] memory [2**Depth-1:0]; + // SRAMs address busses are always registered first. @@ -92,7 +98,7 @@ module SRAM2P1R1W for (index = 0; index < Width; index = index + 1) begin always_ff @ (posedge clk) begin if (WEN1Q & BitWEN1[index]) begin - memory[WA1Q][index] = WD1Q[index]; + memory[WA1Q][index] <= WD1Q[index]; end end end diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index d1c0bfb7..d0a44d88 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -58,7 +58,7 @@ module bpred logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE; logic [3:0] InstrClassD, InstrClassF, InstrClassE; - logic [`XLEN-1:0] BTBPredPCF, RASPCF; + logic [`XLEN-1:0] BTBPredPCF, RASPCF, BTBPredPCMemoryF; logic TargetWrongE; logic FallThroughWrongE; logic PredictionDirWrongE; @@ -71,19 +71,19 @@ module bpred // This is probably too much logic. // *** This also encourages me to switch to predicting the class. - assign InstrClassF[2] = InstrF[5:0] == 7'h67 && InstrF[19:15] == 5'h01; // jump register, but not return - assign InstrClassF[1] = InstrF[5:0] == 7'h6F; // jump - assign InstrClassF[0] = InstrF[5:0] == 7'h63; // branch + assign InstrClassF[2] = InstrF[6:0] == 7'h67 && InstrF[19:15] == 5'h01; // jump register, but not return + assign InstrClassF[1] = InstrF[6:0] == 7'h6F; // jump + assign InstrClassF[0] = InstrF[6:0] == 7'h63; // branch // Part 2 branch direction prediction - twoBitPredictor predictor(.clk(clk), - .LookUpPC(PCNextF), - .Prediction(BPPredF), - // update - .UpdatePC(PCE), - .UpdateEN(InstrClassE[0]), - .UpdatePrediction(UpdateBPPredE)); + twoBitPredictor DirPredictor(.clk(clk), + .LookUpPC(PCNextF), + .Prediction(BPPredF), + // update + .UpdatePC(PCE), + .UpdateEN(InstrClassE[0]), + .UpdatePrediction(UpdateBPPredE)); // this predictor will have two pieces of data, // 1) A direction (1 = Taken, 0 = Not Taken) @@ -99,7 +99,7 @@ module bpred // Part 3 Branch target address prediction // *** For now the BTB will house the direct and indirect targets - BTBPredictor targetPredictor(.clk(clk), + BTBPredictor TargetPredictor(.clk(clk), .reset(reset), .LookUpPC(PCNextF), .TargetPC(BTBPredPCMemoryF), @@ -111,7 +111,7 @@ module bpred // need to forward when updating to the same address as reading. assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE; - assign TargetPC = (UpdatePC == LookUpPC) ? CorrectPCE : BTBPredPCMemoryF; + assign TargetPC = (PCE == PCNextF) ? CorrectPCE : BTBPredPCMemoryF; // Part 4 RAS // *** need to add the logic to restore RAS on flushes. We will use incr for this. @@ -152,12 +152,12 @@ module bpred .d(InstrClassF), .q(InstrClassD)); - flopenr #(4) InstrClassRegE(.clk(clk), - .reset(reset), - .en(~StallD), - .clear(flushD), - .d(InstrClassD), - .q(InstrClassE)); + flopenrc #(4) InstrClassRegE(.clk(clk), + .reset(reset), + .en(~StallD), + .clear(flushD), + .d(InstrClassD), + .q(InstrClassE)); // Check the prediction makes execution. assign TargetWrongE = PCTargetE != PCD; diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 3a12b330..2824efb5 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -27,29 +27,30 @@ `include "wally-config.vh" module ifu ( - input logic clk, reset, - input logic StallF, StallD, FlushD, FlushE, FlushM, FlushW, + input logic clk, reset, + input logic StallF, StallD, FlushF, FlushD, FlushE, FlushM, FlushW, // Fetch - input logic [31:0] InstrF, + input logic [31:0] InstrF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, // Decode - output logic InstrStall, + output logic InstrStall, // Execute - input logic PCSrcE, - input logic [`XLEN-1:0] PCTargetE, - output logic [`XLEN-1:0] PCE, + input logic PCSrcE, + input logic [`XLEN-1:0] PCTargetE, + output logic [`XLEN-1:0] PCE, + output logic BPPredWrongE, // Mem - input logic RetM, TrapM, - input logic [`XLEN-1:0] PrivilegedNextPCM, - output logic [31:0] InstrD, InstrM, + input logic RetM, TrapM, + input logic [`XLEN-1:0] PrivilegedNextPCM, + output logic [31:0] InstrD, InstrM, output logic [`XLEN-1:0] PCM, // Writeback output logic [`XLEN-1:0] PCLinkW, // Faults - input logic IllegalBaseInstrFaultD, - output logic IllegalIEUInstrFaultD, - output logic InstrMisalignedFaultM, + input logic IllegalBaseInstrFaultD, + output logic IllegalIEUInstrFaultD, + output logic InstrMisalignedFaultM, output logic [`XLEN-1:0] InstrMisalignedAdrM ); @@ -62,6 +63,11 @@ module ifu ( logic [31:0] InstrRawD, InstrE; logic [31:0] nop = 32'h00000013; // instruction for NOP + // branch predictor signals + logic SelBPPredF; + logic [`XLEN-1:0] BPPredPCF, PCCorrectE, PCNext0F, PCNext1F; + + // *** put memory interface on here, InstrF becomes output assign InstrStall = 0; // *** assign InstrPAdrF = PCF; // *** no MMU @@ -70,10 +76,49 @@ module ifu ( assign StallExceptResolveBranchesF = StallF & ~(PCSrcE | PrivilegedChangePCM); - mux3 #(`XLEN) pcmux(PCPlus2or4F, PCTargetE, PrivilegedNextPCM, {PrivilegedChangePCM, PCSrcE}, UnalignedPCNextF); + //mux3 #(`XLEN) pcmux(PCPlus2or4F, PCCorrectE, PrivilegedNextPCM, {PrivilegedChangePCM, BPPredWrongE}, UnalignedPCNextF); + mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), + .d1(BPPredPCF), + .s(SelBPPredF), + .y(PCNext0F)); + + mux2 #(`XLEN) pcmux1(.d0(PCNext0F), + .d1(PCCorrectE), + .s(BPPredWrongE), + .y(PCNext1F)); + + mux2 #(`XLEN) pcmux2(.d0(PCNext1F), + .d1(PrivilegedNextPCM), + .s(PrivilegedChangePCM), + .y(UnalignedPCNextF)); + assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallExceptResolveBranchesF, PCNextF, `RESET_VECTOR, PCF); + // branch and jump predictor + // I am making the port connection explicit for now as I want to see them and they will be changing. + bpred bpred(.clk(clk), + .reset(reset), + .StallF(StallF), + .StallD(StallD), + .StallE(1'b0), // *** may need this eventually + .FlushF(FlushF), + .FlushD(FlushD), + .FlushE(FlushE), + .PCNextF(PCNextF), + .BPPredPCF(BPPredPCF), + .SelBPPredF(SelBPPredF), + .InstrF(InstrF), // *** this is flushed internally. The logic is redundant with some out here. + // Also I believe this port will be removed. + .PCE(PCE), + .PCSrcE(PCSrcE), + .PCTargetE(PCTargetE), + .PCD(PCD), + .PCLinkE(PCLinkE), + .BPPredWrongE(BPPredWrongE)); + // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE. + assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE; + // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 assign CompressedF = (InstrF[1:0] != 2'b11); // is it a 16-bit compressed instruction? diff --git a/wally-pipelined/src/ifu/twoBitPredictor.sv b/wally-pipelined/src/ifu/twoBitPredictor.sv index 703312f5..34e46b60 100644 --- a/wally-pipelined/src/ifu/twoBitPredictor.sv +++ b/wally-pipelined/src/ifu/twoBitPredictor.sv @@ -40,6 +40,8 @@ module twoBitPredictor ); logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex; + logic [1:0] PredictionMemory; + // hashing function for indexing the PC // We have Depth bits to index, but XLEN bits as the input. @@ -50,10 +52,10 @@ module twoBitPredictor SRAM2P1R1W #(Depth, 2) memory(.clk(clk), - .RA1(LookUpPC), + .RA1(LookUpPCIndex), .RD1(PredictionMemory), .REN1(1'b1), - .WA1(UpdatePC), + .WA1(UpdatePCIndex), .WD1(UpdatePrediction), .WEN1(UpdateEN), .BitWEN1(2'b11)); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 408045e2..3753c3b1 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -49,7 +49,7 @@ module wallypipelinedhart ( ); logic [1:0] ForwardAE, ForwardBE; - logic StallF, StallD, FlushD, FlushE, FlushM, FlushW; + logic StallF, StallD, FlushF, FlushD, FlushE, FlushM, FlushW; logic RetM, TrapM; // new signals that must connect through DP @@ -86,6 +86,8 @@ module wallypipelinedhart ( logic [`XLEN-1:0] InstrPAdrF; logic DataStall, InstrStall; logic InstrAckD, MemAckW; + logic BPPredWrongE; + ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache