diff --git a/wally-pipelined/src/ifu/BTBPredictor.sv b/wally-pipelined/src/ifu/BTBPredictor.sv new file mode 100644 index 00000000..86ff3778 --- /dev/null +++ b/wally-pipelined/src/ifu/BTBPredictor.sv @@ -0,0 +1,81 @@ +/////////////////////////////////////////// +// SRAM2P1R1W +// +// Written: Ross Thomposn +// Email: ross1728@gmail.com +// Created: February 15, 2021 +// Modified: +// +// Purpose: BTB model. Outputs type of instruction (currently 1 hot encoded. Probably want +// to encode to reduce storage), valid, target PC. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module BTBPredictor + #(parameter int Depth = 10 + ) + (input logic clk, + input logic reset, + input logic [`XLEN-1:0] LookUpPC, + output logic [`XLEN-1:0] TargetPC, + output logic Valid, + // update + input logic UpdateEN, + input logic [`XLEN-1:0] UpdatePC, + input logic [`XLEN-1:0] UpdateTarget + ); + + localparam TotalDepth = 2 ** Depth; + logic [TotalDepth-1:0] ValidBits; + logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex; + + // hashing function for indexing the PC + // We have Depth bits to index, but XLEN bits as the input. + // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if + // using compressed instructions. XOR bit 1 with the MSB of index. + assign UpdatePCIndex = {UpdatePC[Depth+1] ^ UpdatePC[1], UpdatePC[Depth:2]}; + assign LookUpPCIndex = {LookUpPC[Depth+1] ^ LookUpPC[1], LookUpPC[Depth:2]}; + + + // The valid bit must be resetable. + always_ff @ (posedge clk) begin + if (reset) begin + ValidBits <= #1 {TotalDepth{1'b0}}; + end else if (UpdateEN) begin + ValidBits[UpdatePCIndex] <= #1 1'b1; + end + end + + // the BTB contains the target address. + // *** future version may contain the instruction class, a tag or partial tag, + // and other indirection branch data. + // Another optimization may be using a PC relative address. + + SRAM2P1R1W #(Depth, `XLEN) memory(.clk(clk), + .RA1(LookUpPCIndex), + .RD1(TargetPC), + .REN1(1'b1), + .WA1(UpdatePCindex), + .WD1(UpdateTarget), + .WEN1(UpdateEN), + .BitWEN1({XLEN{1'b1}})); + + +endmodule diff --git a/wally-pipelined/src/ifu/RAsPredictor.sv b/wally-pipelined/src/ifu/RAsPredictor.sv new file mode 100644 index 00000000..d985209b --- /dev/null +++ b/wally-pipelined/src/ifu/RAsPredictor.sv @@ -0,0 +1,75 @@ +/////////////////////////////////////////// +// RASPredictor.sv +// +// Written: Ross Thomposn +// Email: ross1728@gmail.com +// Created: February 15, 2021 +// Modified: +// +// Purpose: 2 bit saturating counter predictor with parameterized table depth. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module RASPredictor + #(parameter int StackSize = 16 + ) + (input logic clk, + input logic reset, + input logic pop, + output logic [`XLEN-1:0] popPC, + input logic push, + input logic incr, + input logic [`XLEN-1:0] pushPC + ); + + logic CounterEn; + localparam Depth = $clog2(StackSize); + + logic [StackSize-1:0] PtrD, PtrQ, PtrP1, PtrM1; + logic [StackSize-1:0] [`XLEN-1:0] memory; + + assign CounterEn = pop | push | incr; + + assign PtrD = pop ? PtrM1 : PtrP1; + + assign PtrM1 = PtrQ - 1'b1; + assign PtrP1 = PtrQ + 1'b1; + // may have to handle a push and an incr at the same time. + // *** what happens if jal is executing and there is a return being flushed in Decode? + + flopenr #(StackSize) PTR(.clk(clk), + .reset(reset), + .en(CounterEn), + .d(PtrD), + .q(PtrQ)); + + always_ff @ (posedge clk) begin + if(push) begin + memory[PtrP1] <= #1 pushPC; + end + end + + assign popPC = memory[PtrQ]; + + +endmodule + + + diff --git a/wally-pipelined/src/ifu/SramModel.sv b/wally-pipelined/src/ifu/SramModel.sv index 926af02f..d715d826 100644 --- a/wally-pipelined/src/ifu/SramModel.sv +++ b/wally-pipelined/src/ifu/SramModel.sv @@ -41,7 +41,8 @@ module SRAM2P1R1W // port 2 is write only input logic [Depth-1:0] WA1, input logic [Width-1:0] WD1, - input logic WEN1 + input logic WEN1, + input logic [Width-1:0] BitWEN1 ); @@ -83,13 +84,19 @@ module SRAM2P1R1W .q(WD1Q)); // read port assign RD1 = memory[RA1Q]; + + genvar index; // write port - always_ff @ (posedge clk) begin - if (WEN1Q) begin - memory[WA1Q] = WD1Q; + generate + for (index = 0; index < Width; index = index + 1) begin + always_ff @ (posedge clk) begin + if (WEN1Q & BitWEN1[index]) begin + memory[WA1Q][index] = WD1Q[index]; + end + end end - end + endgenerate endmodule diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index e9294bac..d1c0bfb7 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -30,40 +30,40 @@ module bpred (input logic clk, reset, - input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, + input logic StallF, StallD, StallE, FlushF, FlushD, FlushE, // Fetch stage // the prediction - input [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list - output [`XLEN-1:0] BPPredPCF, - output SelBPPredF, - input [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is. + input logic [`XLEN-1:0] PCNextF, // *** forgot to include this one on the I/O list + output logic [`XLEN-1:0] BPPredPCF, + output logic SelBPPredF, + input logic [31:0] InstrF, // we are going to use the opcode to indicate what type instruction this is. // if this is too slow we will have to predict the type of instruction. // Execute state // Update Predictor - input [`XLEN-1:0] PCE, // The address of the currently executing instruction + input logic [`XLEN-1:0] PCE, // The address of the currently executing instruction // 1 hot encoding // return, jump register, jump, branch // *** after reviewing the compressed instruction set I am leaning towards having the btb predict the instruction class. // *** the specifics of how this is encode is subject to change. - input PCSrcE, // AKA Branch Taken + input logic PCSrcE, // AKA Branch Taken // Signals required to check the branch prediction accuracy. - input [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken. - input [`XLEN-1:0] PCD, // The address the branch predictor took. - input [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) + input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken. + input logic [`XLEN-1:0] PCD, // The address the branch predictor took. + input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) // Report branch prediction status - output BPPredWrongE + output logic BPPredWrongE ); - logic BTBValidF; - logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE; + logic BTBValidF; + logic [1:0] BPPredF, BPPredD, BPPredE, UpdateBPPredE; - logic [3:0] InstrClassD, InstrClassF, InstrClassE; - logic [`XLEN-1:0] BTBPredPCF, RASPCF; - logic TargetWrongE; - logic FallThroughWrongE; - logic PredictionDirWrongE; - logic PredictionPCWrongE; - + logic [3:0] InstrClassD, InstrClassF, InstrClassE; + logic [`XLEN-1:0] BTBPredPCF, RASPCF; + logic TargetWrongE; + logic FallThroughWrongE; + logic PredictionDirWrongE; + logic PredictionPCWrongE; + logic [`XLEN-1:0] CorrectPCE; // Part 1 decode the instruction class. // *** for now I'm skiping the compressed instructions @@ -77,7 +77,8 @@ module bpred // Part 2 branch direction prediction - twoBitPredictor predictor(.LookUpPC(PCNextF), + twoBitPredictor predictor(.clk(clk), + .LookUpPC(PCNextF), .Prediction(BPPredF), // update .UpdatePC(PCE), @@ -89,29 +90,37 @@ module bpred // 2) Any information which is necessary for the predictor to built it's next state. // For a 2 bit table this is the prediction count. - assign SelBPPredF = ((InstrClassF[0] & BPPredF[1]) | + assign SelBPPredF = ((InstrClassF[0] & BPPredF[1] & BTBValidF) | InstrClassF[3] | (InstrClassF[2] & BTBValidF) | - InstrClassF[1]) ; + InstrClassF[1] & BTBValidF) ; // Part 3 Branch target address prediction // *** For now the BTB will house the direct and indirect targets - BTBPredictor targetPredictor(.LookUpPC(PCNextF), - .TargetPC(BTBPredPCF), + BTBPredictor targetPredictor(.clk(clk), + .reset(reset), + .LookUpPC(PCNextF), + .TargetPC(BTBPredPCMemoryF), .Valid(BTBValidF), // update .UpdateEN(InstrClassE[2] | InstrClassE[1] | InstrClassE[0]), .UpdatePC(PCE), .UpdateTarget(PCTargetE)); + // need to forward when updating to the same address as reading. + assign CorrectPCE = PCSrcE ? PCTargetE : PCLinkE; + assign TargetPC = (UpdatePC == LookUpPC) ? CorrectPCE : BTBPredPCMemoryF; // Part 4 RAS - - RASPredictor RASPredictor(.pop(InstrClassF[3]), + // *** need to add the logic to restore RAS on flushes. We will use incr for this. + RASPredictor RASPredictor(.clk(clk), + .reset(reset), + .pop(InstrClassF[3]), .popPC(RASPCF), .push(InstrClassE[3]), + .incr(1'b0), .pushPC(PCLinkE)); assign BPPredPCF = InstrClassF[3] ? RASPCF : BTBPredPCF; @@ -126,14 +135,14 @@ module bpred .en(~StallF), .clear(FlushF), .d(BPPredF), - .Q(BPPredD)); + .q(BPPredD)); flopenrc #(2) BPPredRegE(.clk(clk), .reset(reset), .en(~StallD), .clear(FlushD), .d(BPPredD), - .Q(BPPredE)); + .q(BPPredE)); // pipeline the class flopenrc #(4) InstrClassRegD(.clk(clk), diff --git a/wally-pipelined/src/ifu/satCounter2.sv b/wally-pipelined/src/ifu/satCounter2.sv index 91e47b04..33a842dc 100644 --- a/wally-pipelined/src/ifu/satCounter2.sv +++ b/wally-pipelined/src/ifu/satCounter2.sv @@ -29,7 +29,6 @@ module satCounter2 (input logic BrDir, - input logic Decr, input logic [1:0] OldState, output logic [1:0] NewState ); diff --git a/wally-pipelined/src/ifu/twoBitPredictor.sv b/wally-pipelined/src/ifu/twoBitPredictor.sv index 6aa8f0b1..703312f5 100644 --- a/wally-pipelined/src/ifu/twoBitPredictor.sv +++ b/wally-pipelined/src/ifu/twoBitPredictor.sv @@ -30,22 +30,33 @@ module twoBitPredictor #(parameter int Depth = 10 ) - (input clk, - input [`XLEN-1:0] LookUpPC, - output [1:0] Prediction, + (input logic clk, + input logic [`XLEN-1:0] LookUpPC, + output logic [1:0] Prediction, // update - input [`XLEN-1:0] UpdatePC, - input UpdateEN, - input [1:0] UpdatePrediction + input logic [`XLEN-1:0] UpdatePC, + input logic UpdateEN, + input logic [1:0] UpdatePrediction ); + logic [Depth-1:0] LookUpPCIndex, UpdatePCIndex; + + // hashing function for indexing the PC + // We have Depth bits to index, but XLEN bits as the input. + // bit 0 is always 0, bit 1 is 0 if using 4 byte instructions, but is not always 0 if + // using compressed instructions. XOR bit 1 with the MSB of index. + assign UpdatePCIndex = {UpdatePC[Depth+1] ^ UpdatePC[1], UpdatePC[Depth:2]}; + assign LookUpPCIndex = {LookUpPC[Depth+1] ^ LookUpPC[1], LookUpPC[Depth:2]}; + + SRAM2P1R1W #(Depth, 2) memory(.clk(clk), .RA1(LookUpPC), .RD1(PredictionMemory), .REN1(1'b1), .WA1(UpdatePC), .WD1(UpdatePrediction), - .WEN1(UpdateEN)); + .WEN1(UpdateEN), + .BitWEN1(2'b11)); // need to forward when updating to the same address as reading. assign Prediction = (UpdatePC == LookUpPC) ? UpdatePrediction : PredictionMemory;