mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
We now have a solid rough draft of the 2 bit sat counter branch predictor with BTB and RAS.
This is not yet tested but the system verilog does compile.
This commit is contained in:
parent
935e9e59e9
commit
ca546beaf8
81
wally-pipelined/src/ifu/BTBPredictor.sv
Normal file
81
wally-pipelined/src/ifu/BTBPredictor.sv
Normal file
@ -0,0 +1,81 @@
|
||||
///////////////////////////////////////////
|
||||
// SRAM2P1R1W
|
||||
//
|
||||
// Written: Ross Thomposn
|
||||
// Email: ross1728@gmail.com
|
||||
// Created: February 15, 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: BTB model. Outputs type of instruction (currently 1 hot encoded. Probably want
|
||||
// to encode to reduce storage), valid, target PC.
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module BTBPredictor
|
||||
#(parameter int Depth = 10
|
||||
)
|
||||
(input logic clk,
|
||||
input logic reset,
|
||||
input logic [`XLEN-1:0] LookUpPC,
|
||||
output logic [`XLEN-1:0] TargetPC,
|
||||
output logic Valid,
|
||||
// update
|
||||
input logic UpdateEN,
|
||||
input logic [`XLEN-1:0] UpdatePC,
|
||||
input logic [`XLEN-1:0] UpdateTarget
|
||||
);
|
||||
|
||||
localparam TotalDepth = 2 ** Depth;
|
||||
logic [TotalDepth-1:0] ValidBits;
|
||||
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex;
|
||||
|
||||
// hashing function for indexing the PC
|
||||
// We have Depth bits to index, but XLEN bits as the input.
|
||||
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
|
||||
// using compressed instructions. XOR bit 1 with the MSB of index.
|
||||
assign UpdatePCIndex = {UpdatePC[Depth+1] ^ UpdatePC[1], UpdatePC[Depth:2]};
|
||||
assign LookUpPCIndex = {LookUpPC[Depth+1] ^ LookUpPC[1], LookUpPC[Depth:2]};
|
||||
|
||||
|
||||
// The valid bit must be resetable.
|
||||
always_ff @ (posedge clk) begin
|
||||
if (reset) begin
|
||||
ValidBits <= #1 {TotalDepth{1'b0}};
|
||||
end else if (UpdateEN) begin
|
||||
ValidBits[UpdatePCIndex] <= #1 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// the BTB contains the target address.
|
||||
// *** future version may contain the instruction class, a tag or partial tag,
|
||||
// and other indirection branch data.
|
||||
// Another optimization may be using a PC relative address.
|
||||
|
||||
SRAM2P1R1W #(Depth, `XLEN) memory(.clk(clk),
|
||||
.RA1(LookUpPCIndex),
|
||||
.RD1(TargetPC),
|
||||
.REN1(1'b1),
|
||||
.WA1(UpdatePCindex),
|
||||
.WD1(UpdateTarget),
|
||||
.WEN1(UpdateEN),
|
||||
.BitWEN1({XLEN{1'b1}}));
|
||||
|
||||
|
||||
endmodule
|
75
wally-pipelined/src/ifu/RAsPredictor.sv
Normal file
75
wally-pipelined/src/ifu/RAsPredictor.sv
Normal file
@ -0,0 +1,75 @@
|
||||
///////////////////////////////////////////
|
||||
// RASPredictor.sv
|
||||
//
|
||||
// Written: Ross Thomposn
|
||||
// Email: ross1728@gmail.com
|
||||
// Created: February 15, 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: 2 bit saturating counter predictor with parameterized table depth.
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module RASPredictor
|
||||
#(parameter int StackSize = 16
|
||||
)
|
||||
(input logic clk,
|
||||
input logic reset,
|
||||
input logic pop,
|
||||
output logic [`XLEN-1:0] popPC,
|
||||
input logic push,
|
||||
input logic incr,
|
||||
input logic [`XLEN-1:0] pushPC
|
||||
);
|
||||
|
||||
logic CounterEn;
|
||||
localparam Depth = $clog2(StackSize);
|
||||
|
||||
logic [StackSize-1:0] PtrD, PtrQ, PtrP1, PtrM1;
|
||||
logic [StackSize-1:0] [`XLEN-1:0] memory;
|
||||
|
||||
assign CounterEn = pop | push | incr;
|
||||
|
||||
assign PtrD = pop ? PtrM1 : PtrP1;
|
||||
|
||||
assign PtrM1 = PtrQ - 1'b1;
|
||||
assign PtrP1 = PtrQ + 1'b1;
|
||||
// may have to handle a push and an incr at the same time.
|
||||
// *** what happens if jal is executing and there is a return being flushed in Decode?
|
||||
|
||||
flopenr #(StackSize) PTR(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(CounterEn),
|
||||
.d(PtrD),
|
||||
.q(PtrQ));
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if(push) begin
|
||||
memory[PtrP1] <= #1 pushPC;
|
||||
end
|
||||
end
|
||||
|
||||
assign popPC = memory[PtrQ];
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -41,7 +41,8 @@ module SRAM2P1R1W
|
||||
// port 2 is write only
|
||||
input logic [Depth-1:0] WA1,
|
||||
input logic [Width-1:0] WD1,
|
||||
input logic WEN1
|
||||
input logic WEN1,
|
||||
input logic [Width-1:0] BitWEN1
|
||||
);
|
||||
|
||||
|
||||
@ -83,13 +84,19 @@ module SRAM2P1R1W
|
||||
.q(WD1Q));
|
||||
// read port
|
||||
assign RD1 = memory[RA1Q];
|
||||
|
||||
genvar index;
|
||||
|
||||
// write port
|
||||
always_ff @ (posedge clk) begin
|
||||
if (WEN1Q) begin
|
||||
memory[WA1Q] = WD1Q;
|
||||
generate
|
||||
for (index = 0; index < Width; index = index + 1) begin
|
||||
always_ff @ (posedge clk) begin
|
||||
if (WEN1Q & BitWEN1[index]) begin
|
||||
memory[WA1Q][index] = WD1Q[index];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
||||
|
||||
|
@ -30,40 +30,40 @@
|
||||
|
||||
module bpred
|
||||
(input logic clk, reset,
|
||||
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
|
||||
input logic StallF, StallD, StallE, FlushF, FlushD, FlushE,
|
||||
// Fetch stage
|
||||
// the prediction
|
||||
input [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
|
||||
output [`XLEN-1:0] BPPredPCF,
|
||||
output SelBPPredF,
|
||||
input [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is.
|
||||
input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list
|
||||
output logic [`XLEN-1:0] BPPredPCF,
|
||||
output logic SelBPPredF,
|
||||
input logic [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is.
|
||||
// if this is too slow we will have to predict the type of instruction.
|
||||
// Execute state
|
||||
// Update Predictor
|
||||
input [`XLEN-1:0] PCE, // The address of the currently executing instruction
|
||||
input logic [`XLEN-1:0] PCE, // The address of the currently executing instruction
|
||||
// 1 hot encoding
|
||||
// return, jump register, jump, branch
|
||||
// *** after reviewing the compressed instruction set I am leaning towards having the btb predict the instruction class.
|
||||
// *** the specifics of how this is encode is subject to change.
|
||||
input PCSrcE, // AKA Branch Taken
|
||||
input logic PCSrcE, // AKA Branch Taken
|
||||
// Signals required to check the branch prediction accuracy.
|
||||
input [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
|
||||
input [`XLEN-1:0] PCD, // The address the branch predictor took.
|
||||
input [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken.
|
||||
input logic [`XLEN-1:0] PCD, // The address the branch predictor took.
|
||||
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
|
||||
// Report branch prediction status
|
||||
output BPPredWrongE
|
||||
output logic BPPredWrongE
|
||||
);
|
||||
|
||||
logic BTBValidF;
|
||||
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
|
||||
logic BTBValidF;
|
||||
logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE;
|
||||
|
||||
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
|
||||
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
||||
logic TargetWrongE;
|
||||
logic FallThroughWrongE;
|
||||
logic PredictionDirWrongE;
|
||||
logic PredictionPCWrongE;
|
||||
|
||||
logic [3:0] InstrClassD, InstrClassF, InstrClassE;
|
||||
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
|
||||
logic TargetWrongE;
|
||||
logic FallThroughWrongE;
|
||||
logic PredictionDirWrongE;
|
||||
logic PredictionPCWrongE;
|
||||
logic [`XLEN-1:0] CorrectPCE;
|
||||
|
||||
// Part 1 decode the instruction class.
|
||||
// *** for now I'm skiping the compressed instructions
|
||||
@ -77,7 +77,8 @@ module bpred
|
||||
|
||||
// Part 2 branch direction prediction
|
||||
|
||||
twoBitPredictor predictor(.LookUpPC(PCNextF),
|
||||
twoBitPredictor predictor(.clk(clk),
|
||||
.LookUpPC(PCNextF),
|
||||
.Prediction(BPPredF),
|
||||
// update
|
||||
.UpdatePC(PCE),
|
||||
@ -89,29 +90,37 @@ module bpred
|
||||
// 2) Any information which is necessary for the predictor to built it's next state.
|
||||
// For a 2 bit table this is the prediction count.
|
||||
|
||||
assign SelBPPredF = ((InstrClassF[0] & BPPredF[1]) |
|
||||
assign SelBPPredF = ((InstrClassF[0] & BPPredF[1] & BTBValidF) |
|
||||
InstrClassF[3] |
|
||||
(InstrClassF[2] & BTBValidF) |
|
||||
InstrClassF[1]) ;
|
||||
InstrClassF[1] & BTBValidF) ;
|
||||
|
||||
|
||||
// Part 3 Branch target address prediction
|
||||
// *** For now the BTB will house the direct and indirect targets
|
||||
|
||||
BTBPredictor targetPredictor(.LookUpPC(PCNextF),
|
||||
.TargetPC(BTBPredPCF),
|
||||
BTBPredictor targetPredictor(.clk(clk),
|
||||
.reset(reset),
|
||||
.LookUpPC(PCNextF),
|
||||
.TargetPC(BTBPredPCMemoryF),
|
||||
.Valid(BTBValidF),
|
||||
// update
|
||||
.UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]),
|
||||
.UpdatePC(PCE),
|
||||
.UpdateTarget(PCTargetE));
|
||||
|
||||
// need to forward when updating to the same address as reading.
|
||||
assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE;
|
||||
assign TargetPC = (UpdatePC == LookUpPC) ? CorrectPCE : BTBPredPCMemoryF;
|
||||
|
||||
// Part 4 RAS
|
||||
|
||||
RASPredictor RASPredictor(.pop(InstrClassF[3]),
|
||||
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
|
||||
RASPredictor RASPredictor(.clk(clk),
|
||||
.reset(reset),
|
||||
.pop(InstrClassF[3]),
|
||||
.popPC(RASPCF),
|
||||
.push(InstrClassE[3]),
|
||||
.incr(1'b0),
|
||||
.pushPC(PCLinkE));
|
||||
|
||||
assign BPPredPCF = InstrClassF[3] ? RASPCF : BTBPredPCF;
|
||||
@ -126,14 +135,14 @@ module bpred
|
||||
.en(~StallF),
|
||||
.clear(FlushF),
|
||||
.d(BPPredF),
|
||||
.Q(BPPredD));
|
||||
.q(BPPredD));
|
||||
|
||||
flopenrc #(2) BPPredRegE(.clk(clk),
|
||||
.reset(reset),
|
||||
.en(~StallD),
|
||||
.clear(FlushD),
|
||||
.d(BPPredD),
|
||||
.Q(BPPredE));
|
||||
.q(BPPredE));
|
||||
|
||||
// pipeline the class
|
||||
flopenrc #(4) InstrClassRegD(.clk(clk),
|
||||
|
@ -29,7 +29,6 @@
|
||||
|
||||
module satCounter2
|
||||
(input logic BrDir,
|
||||
input logic Decr,
|
||||
input logic [1:0] OldState,
|
||||
output logic [1:0] NewState
|
||||
);
|
||||
|
@ -30,22 +30,33 @@
|
||||
module twoBitPredictor
|
||||
#(parameter int Depth = 10
|
||||
)
|
||||
(input clk,
|
||||
input [`XLEN-1:0] LookUpPC,
|
||||
output [1:0] Prediction,
|
||||
(input logic clk,
|
||||
input logic [`XLEN-1:0] LookUpPC,
|
||||
output logic [1:0] Prediction,
|
||||
// update
|
||||
input [`XLEN-1:0] UpdatePC,
|
||||
input UpdateEN,
|
||||
input [1:0] UpdatePrediction
|
||||
input logic [`XLEN-1:0] UpdatePC,
|
||||
input logic UpdateEN,
|
||||
input logic [1:0] UpdatePrediction
|
||||
);
|
||||
|
||||
logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex;
|
||||
|
||||
// hashing function for indexing the PC
|
||||
// We have Depth bits to index, but XLEN bits as the input.
|
||||
// bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if
|
||||
// using compressed instructions. XOR bit 1 with the MSB of index.
|
||||
assign UpdatePCIndex = {UpdatePC[Depth+1] ^ UpdatePC[1], UpdatePC[Depth:2]};
|
||||
assign LookUpPCIndex = {LookUpPC[Depth+1] ^ LookUpPC[1], LookUpPC[Depth:2]};
|
||||
|
||||
|
||||
SRAM2P1R1W #(Depth, 2) memory(.clk(clk),
|
||||
.RA1(LookUpPC),
|
||||
.RD1(PredictionMemory),
|
||||
.REN1(1'b1),
|
||||
.WA1(UpdatePC),
|
||||
.WD1(UpdatePrediction),
|
||||
.WEN1(UpdateEN));
|
||||
.WEN1(UpdateEN),
|
||||
.BitWEN1(2'b11));
|
||||
|
||||
// need to forward when updating to the same address as reading.
|
||||
assign Prediction = (UpdatePC == LookUpPC) ? UpdatePrediction : PredictionMemory;
|
||||
|
Loading…
Reference in New Issue
Block a user