We now have a solid rough draft of the 2 bit sat counter branch predictor with BTB and RAS.

This is not yet tested but the system verilog does compile.
This commit is contained in:
Ross Thompson 2021-02-15 14:51:39 -06:00
parent 935e9e59e9
commit ca546beaf8
6 changed files with 224 additions and 42 deletions

View File

@ -0,0 +1,81 @@
///////////////////////////////////////////
// SRAM2P1R1W
//
// Written: Ross Thomposn
// Email: ross1728@gmail.com
// Created: February 15, 2021
// Modified:
//
// Purpose: BTB model. Outputs type of instruction (currently 1 hot encoded. Probably want
// to encode to reduce storage), valid, target PC.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module BTBPredictor
#(parameter int Depth = 10
)
(input logic clk,
input logic reset,
input logic [`XLEN-1:0] LookUpPC,
output logic [`XLEN-1:0] TargetPC,
output logic Valid,
// update
input logic UpdateEN,
input logic [`XLEN-1:0] UpdatePC,
input logic [`XLEN-1:0] UpdateTarget
);
localparam TotalDepth = 2 ** Depth;
logic [TotalDepth-1:0] ValidBits;
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex;
// hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input.
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
// using compressed instructions. XOR bit 1 with the MSB of index.
assign UpdatePCIndex = {UpdatePC[Depth+1] ^ UpdatePC[1], UpdatePC[Depth:2]};
assign LookUpPCIndex = {LookUpPC[Depth+1] ^ LookUpPC[1], LookUpPC[Depth:2]};
// The valid bit must be resetable.
always_ff @ (posedge clk) begin
if (reset) begin
ValidBits <= #1 {TotalDepth{1'b0}};
end else if (UpdateEN) begin
ValidBits[UpdatePCIndex] <= #1 1'b1;
end
end
// the BTB contains the target address.
// *** future version may contain the instruction class, a tag or partial tag,
// and other indirection branch data.
// Another optimization may be using a PC relative address.
SRAM2P1R1W #(Depth, `XLEN) memory(.clk(clk),
.RA1(LookUpPCIndex),
.RD1(TargetPC),
.REN1(1'b1),
.WA1(UpdatePCindex),
.WD1(UpdateTarget),
.WEN1(UpdateEN),
.BitWEN1({XLEN{1'b1}}));
endmodule

View File

@ -0,0 +1,75 @@
///////////////////////////////////////////
// RASPredictor.sv
//
// Written: Ross Thomposn
// Email: ross1728@gmail.com
// Created: February 15, 2021
// Modified:
//
// Purpose: 2 bit saturating counter predictor with parameterized table depth.
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module RASPredictor
#(parameter int StackSize = 16
)
(input logic clk,
input logic reset,
input logic pop,
output logic [`XLEN-1:0] popPC,
input logic push,
input logic incr,
input logic [`XLEN-1:0] pushPC
);
logic CounterEn;
localparam Depth = $clog2(StackSize);
logic [StackSize-1:0] PtrD, PtrQ, PtrP1, PtrM1;
logic [StackSize-1:0] [`XLEN-1:0] memory;
assign CounterEn = pop | push | incr;
assign PtrD = pop ? PtrM1 : PtrP1;
assign PtrM1 = PtrQ - 1'b1;
assign PtrP1 = PtrQ + 1'b1;
// may have to handle a push and an incr at the same time.
// *** what happens if jal is executing and there is a return being flushed in Decode?
flopenr #(StackSize) PTR(.clk(clk),
.reset(reset),
.en(CounterEn),
.d(PtrD),
.q(PtrQ));
always_ff @ (posedge clk) begin
if(push) begin
memory[PtrP1] <= #1 pushPC;
end
end
assign popPC = memory[PtrQ];
endmodule

View File

@ -41,7 +41,8 @@ module SRAM2P1R1W
// port 2 is write only
input logic [Depth-1:0] WA1,
input logic [Width-1:0] WD1,
input logic WEN1
input logic WEN1,
input logic [Width-1:0] BitWEN1
);
@ -83,13 +84,19 @@ module SRAM2P1R1W
.q(WD1Q));
// read port
assign RD1 = memory[RA1Q];
genvar index;
// write port
always_ff @ (posedge clk) begin
if (WEN1Q) begin
memory[WA1Q] = WD1Q;
generate
for (index = 0; index < Width; index = index + 1) begin
always_ff @ (posedge clk) begin
if (WEN1Q & BitWEN1[index]) begin
memory[WA1Q][index] = WD1Q[index];
end
end
end
end
endgenerate
endmodule

View File

@ -30,40 +30,40 @@
module bpred
(input logic clk, reset,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
// Fetch stage
// the prediction
input [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
output [`XLEN-1:0] BPPredPCF,
output SelBPPredF,
input [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is.
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
output logic [`XLEN-1:0] BPPredPCF,
output logic SelBPPredF,
input logic [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is.
// if this is too slow we will have to predict the type of instruction.
// Execute state
// Update Predictor
input [`XLEN-1:0] PCE, // The address of the currently executing instruction
input logic [`XLEN-1:0] PCE, // The address of the currently executing instruction
// 1 hot encoding
// return, jump register, jump, branch
// *** after reviewing the compressed instruction set I am leaning towards having the btb predict the instruction class.
// *** the specifics of how this is encode is subject to change.
input PCSrcE, // AKA Branch Taken
input logic PCSrcE, // AKA Branch Taken
// Signals required to check the branch prediction accuracy.
input [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
input [`XLEN-1:0] PCD, // The address the branch predictor took.
input [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
// Report branch prediction status
output BPPredWrongE
output logic BPPredWrongE
);
logic BTBValidF;
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
logic BTBValidF;
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic TargetWrongE;
logic FallThroughWrongE;
logic PredictionDirWrongE;
logic PredictionPCWrongE;
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic TargetWrongE;
logic FallThroughWrongE;
logic PredictionDirWrongE;
logic PredictionPCWrongE;
logic [`XLEN-1:0] CorrectPCE;
// Part 1 decode the instruction class.
// *** for now I'm skiping the compressed instructions
@ -77,7 +77,8 @@ module bpred
// Part 2 branch direction prediction
twoBitPredictor predictor(.LookUpPC(PCNextF),
twoBitPredictor predictor(.clk(clk),
.LookUpPC(PCNextF),
.Prediction(BPPredF),
// update
.UpdatePC(PCE),
@ -89,29 +90,37 @@ module bpred
// 2) Any information which is necessary for the predictor to built it's next state.
// For a 2 bit table this is the prediction count.
assign SelBPPredF = ((InstrClassF[0] & BPPredF[1]) |
assign SelBPPredF = ((InstrClassF[0] & BPPredF[1] & BTBValidF) |
InstrClassF[3] |
(InstrClassF[2] & BTBValidF) |
InstrClassF[1]) ;
InstrClassF[1] & BTBValidF) ;
// Part 3 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets
BTBPredictor targetPredictor(.LookUpPC(PCNextF),
.TargetPC(BTBPredPCF),
BTBPredictor targetPredictor(.clk(clk),
.reset(reset),
.LookUpPC(PCNextF),
.TargetPC(BTBPredPCMemoryF),
.Valid(BTBValidF),
// update
.UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
.UpdatePC(PCE),
.UpdateTarget(PCTargetE));
// need to forward when updating to the same address as reading.
assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
assign TargetPC = (UpdatePC == LookUpPC) ? CorrectPCE : BTBPredPCMemoryF;
// Part 4 RAS
RASPredictor RASPredictor(.pop(InstrClassF[3]),
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
RASPredictor RASPredictor(.clk(clk),
.reset(reset),
.pop(InstrClassF[3]),
.popPC(RASPCF),
.push(InstrClassE[3]),
.incr(1'b0),
.pushPC(PCLinkE));
assign BPPredPCF = InstrClassF[3] ? RASPCF : BTBPredPCF;
@ -126,14 +135,14 @@ module bpred
.en(~StallF),
.clear(FlushF),
.d(BPPredF),
.Q(BPPredD));
.q(BPPredD));
flopenrc #(2) BPPredRegE(.clk(clk),
.reset(reset),
.en(~StallD),
.clear(FlushD),
.d(BPPredD),
.Q(BPPredE));
.q(BPPredE));
// pipeline the class
flopenrc #(4) InstrClassRegD(.clk(clk),

View File

@ -29,7 +29,6 @@
module satCounter2
(input logic BrDir,
input logic Decr,
input logic [1:0] OldState,
output logic [1:0] NewState
);

View File

@ -30,22 +30,33 @@
module twoBitPredictor
#(parameter int Depth = 10
)
(input clk,
input [`XLEN-1:0] LookUpPC,
output [1:0] Prediction,
(input logic clk,
input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] Prediction,
// update
input [`XLEN-1:0] UpdatePC,
input UpdateEN,
input [1:0] UpdatePrediction
input logic [`XLEN-1:0] UpdatePC,
input logic UpdateEN,
input logic [1:0] UpdatePrediction
);
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex;
// hashing function for indexing the PC
// We have Depth bits to index, but XLEN bits as the input.
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
// using compressed instructions. XOR bit 1 with the MSB of index.
assign UpdatePCIndex = {UpdatePC[Depth+1] ^ UpdatePC[1], UpdatePC[Depth:2]};
assign LookUpPCIndex = {LookUpPC[Depth+1] ^ LookUpPC[1], LookUpPC[Depth:2]};
SRAM2P1R1W #(Depth, 2) memory(.clk(clk),
.RA1(LookUpPC),
.RD1(PredictionMemory),
.REN1(1'b1),
.WA1(UpdatePC),
.WD1(UpdatePrediction),
.WEN1(UpdateEN));
.WEN1(UpdateEN),
.BitWEN1(2'b11));
// need to forward when updating to the same address as reading.
assign Prediction = (UpdatePC == LookUpPC) ? UpdatePrediction : PredictionMemory;