csr cleanup

This commit is contained in:
David Harris 2023-01-13 20:55:21 -08:00
commit 7358402bc0
14 changed files with 1097 additions and 73 deletions

View File

@ -0,0 +1,45 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings.
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench_imperas.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063
vopt +acc work.testbench -G DEBUG=1 -o workopt
vsim workopt +nowarn3829 -fatal 7
view wave
#-- display input and output signals as hexidecimal values
add log -recursive /*
do wave.do
run -all
noview ../testbench/testbench_imperas.sv
view wave

View File

@ -37,13 +37,13 @@ module BTBPredictor
input logic StallF, StallE, input logic StallF, StallE,
input logic [`XLEN-1:0] LookUpPC, input logic [`XLEN-1:0] LookUpPC,
output logic [`XLEN-1:0] TargetPC, output logic [`XLEN-1:0] TargetPC,
output logic [4:0] InstrClass, output logic [3:0] InstrClass,
output logic Valid, output logic Valid,
// update // update
input logic UpdateEN, input logic UpdateEN,
input logic [`XLEN-1:0] UpdatePC, input logic [`XLEN-1:0] UpdatePC,
input logic [`XLEN-1:0] UpdateTarget, input logic [`XLEN-1:0] UpdateTarget,
input logic [4:0] UpdateInstrClass, input logic [3:0] UpdateInstrClass,
input logic UpdateInvalid input logic UpdateInvalid
); );
@ -99,7 +99,7 @@ module BTBPredictor
// *** need to add forwarding. // *** need to add forwarding.
// *** optimize for byte write enables // *** optimize for byte write enables
ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk), ram2p1r1wb #(Depth, `XLEN+4) memory(.clk(clk),
.reset(reset), .reset(reset),
.ra1(LookUpPCIndex), .ra1(LookUpPCIndex),
.rd1({{InstrClass, TargetPC}}), .rd1({{InstrClass, TargetPC}}),
@ -107,7 +107,7 @@ module BTBPredictor
.wa2(UpdatePCIndex), .wa2(UpdatePCIndex),
.wd2({UpdateInstrClass, UpdateTarget}), .wd2({UpdateInstrClass, UpdateTarget}),
.wen2(UpdateEN), .wen2(UpdateEN),
.bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right. .bwe2({4'hF, {`XLEN{1'b1}}})); // *** definitely not right.
endmodule endmodule

View File

@ -32,14 +32,19 @@ module RASPredictor
#(parameter int StackSize = 16 #(parameter int StackSize = 16
) )
(input logic clk, (input logic clk,
input logic reset, input logic reset,
input logic pop, input logic PopF,
output logic [`XLEN-1:0] popPC, output logic [`XLEN-1:0] RASPCF,
input logic push, input logic [3:0] WrongPredInstrClassD,
input logic incr, input logic [3:0] InstrClassD,
input logic [`XLEN-1:0] pushPC input logic PushE,
input logic incr,
input logic [`XLEN-1:0] PCLinkE
); );
// *** need to update so it either doesn't push until the memory stage
// or need to repair flushed push.
// *** need to repair popped and then flushed returns.
logic CounterEn; logic CounterEn;
localparam Depth = $clog2(StackSize); localparam Depth = $clog2(StackSize);
@ -47,13 +52,13 @@ module RASPredictor
logic [StackSize-1:0] [`XLEN-1:0] memory; logic [StackSize-1:0] [`XLEN-1:0] memory;
integer index; integer index;
assign CounterEn = pop | push | incr; assign CounterEn = PopF | PushE | incr | WrongPredInstrClassD[2];
assign PtrD = pop ? PtrM1 : PtrP1; assign PtrD = PopF | InstrClassD[2] ? PtrM1 : PtrP1;
assign PtrM1 = PtrQ - 1'b1; assign PtrM1 = PtrQ - 1'b1;
assign PtrP1 = PtrQ + 1'b1; assign PtrP1 = PtrQ + 1'b1;
// may have to handle a push and an incr at the same time. // may have to handle a PushE and an incr at the same time.
// *** what happens if jal is executing and there is a return being flushed in Decode? // *** what happens if jal is executing and there is a return being flushed in Decode?
flopenr #(Depth) PTR(.clk(clk), flopenr #(Depth) PTR(.clk(clk),
@ -67,12 +72,12 @@ module RASPredictor
if(reset) begin if(reset) begin
for(index=0; index<StackSize; index++) for(index=0; index<StackSize; index++)
memory[index] <= {`XLEN{1'b0}}; memory[index] <= {`XLEN{1'b0}};
end else if(push) begin end else if(PushE) begin
memory[PtrP1] <= #1 pushPC; memory[PtrP1] <= #1 PCLinkE;
end end
end end
assign popPC = memory[PtrQ]; assign RASPCF = memory[PtrQ];
endmodule endmodule

View File

@ -52,7 +52,7 @@ module bpred (
input logic PCSrcE, // Executation stage branch is taken input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
output logic [4:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
// Report branch prediction status // Report branch prediction status
output logic BPPredWrongE, // Prediction is wrong. output logic BPPredWrongE, // Prediction is wrong.
@ -65,13 +65,13 @@ module bpred (
logic BTBValidF; logic BTBValidF;
logic [1:0] DirPredictionF; logic [1:0] DirPredictionF;
logic [4:0] BPInstrClassF, BPInstrClassD, BPInstrClassE; logic [3:0] PredInstrClassF, PredInstrClassD, PredInstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF; logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic TargetWrongE; logic TargetWrongE;
logic FallThroughWrongE; logic FallThroughWrongE;
logic PredictionPCWrongE; logic PredictionPCWrongE;
logic PredictionInstrClassWrongE; logic PredictionInstrClassWrongE;
logic [4:0] InstrClassD, InstrClassE, InstrClassW; logic [3:0] InstrClassD, InstrClassE, InstrClassW;
logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE; logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
logic SelBPPredF; logic SelBPPredF;
@ -79,7 +79,8 @@ module bpred (
logic BPPredWrongM; logic BPPredWrongM;
logic [`XLEN-1:0] PCNext0F; logic [`XLEN-1:0] PCNext0F;
logic [`XLEN-1:0] PCCorrectE; logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD;
// Part 1 branch direction prediction // Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong. // look into the 2 port Sram model. something is wrong.
if (`BPTYPE == "BPTWOBIT") begin:Predictor if (`BPTYPE == "BPTWOBIT") begin:Predictor
@ -95,7 +96,7 @@ module bpred (
end else if (`BPTYPE == "BPSPECULATIVEGLOBAL") begin:Predictor end else if (`BPTYPE == "BPSPECULATIVEGLOBAL") begin:Predictor
speculativeglobalhistory #(10) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, speculativeglobalhistory #(10) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrF(BPInstrClassF[0]), .BranchInstrD(BPInstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrW(InstrClassW[0]), .PCSrcE); .BranchInstrW(InstrClassW[0]), .PCSrcE);
end else if (`BPTYPE == "BPGSHARE") begin:Predictor end else if (`BPTYPE == "BPGSHARE") begin:Predictor
@ -106,8 +107,8 @@ module bpred (
end else if (`BPTYPE == "BPSPECULATIVEGSHARE") begin:Predictor end else if (`BPTYPE == "BPSPECULATIVEGSHARE") begin:Predictor
speculativegshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW, speculativegshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE, .PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrF(BPInstrClassF[0]), .BranchInstrD(BPInstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]), .BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrW(InstrClassW[0]), .PCSrcE); .BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE);
end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
// *** Fix me // *** Fix me
@ -129,10 +130,9 @@ module bpred (
// 1) A direction (1 = Taken, 0 = Not Taken) // 1) A direction (1 = Taken, 0 = Not Taken)
// 2) Any information which is necessary for the predictor to build its next state. // 2) Any information which is necessary for the predictor to build its next state.
// For a 2 bit table this is the prediction count. // For a 2 bit table this is the prediction count.
assign SelBPPredF = ((BPInstrClassF[0] & DirPredictionF[1] & BTBValidF) | assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & BTBValidF) |
BPInstrClassF[3] | PredInstrClassF[2] |
(BPInstrClassF[2] & BTBValidF) | (PredInstrClassF[1] & BTBValidF) ;
BPInstrClassF[1] & BTBValidF) ;
// Part 2 Branch target address prediction // Part 2 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets // *** For now the BTB will house the direct and indirect targets
@ -143,7 +143,7 @@ module bpred (
.*, // Stalls and flushes .*, // Stalls and flushes
.LookUpPC(PCNextF), .LookUpPC(PCNextF),
.TargetPC(BTBPredPCF), .TargetPC(BTBPredPCF),
.InstrClass(BPInstrClassF), .InstrClass(PredInstrClassF),
.Valid(BTBValidF), .Valid(BTBValidF),
// update // update
.UpdateEN((|InstrClassE | (PredictionInstrClassWrongE)) & ~StallE), .UpdateEN((|InstrClassE | (PredictionInstrClassWrongE)) & ~StallE),
@ -154,26 +154,28 @@ module bpred (
// Part 3 RAS // Part 3 RAS
// *** need to add the logic to restore RAS on flushes. We will use incr for this. // *** need to add the logic to restore RAS on flushes. We will use incr for this.
// *** needs to include flushX
RASPredictor RASPredictor(.clk(clk), RASPredictor RASPredictor(.clk(clk),
.reset(reset), .reset(reset),
.pop(BPInstrClassF[3] & ~StallF), .PopF(PredInstrClassF[2] & ~StallF),
.popPC(RASPCF), .WrongPredInstrClassD,
.push(InstrClassE[4] & ~StallE), .InstrClassD,
.RASPCF,
.PushE(InstrClassE[3] & ~StallE),
.incr(1'b0), .incr(1'b0),
.pushPC(PCLinkE)); .PCLinkE);
assign BPPredPCF = BPInstrClassF[3] ? RASPCF : BTBPredPCF; assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTBPredPCF;
// the branch predictor needs a compact decoding of the instruction class. // the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns. assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5 assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5 assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return (InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
flopenrc #(5) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE); flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE);
flopenrc #(5) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM); flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM);
flopenrc #(5) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW); flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW);
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM); flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// branch predictor // branch predictor
@ -182,8 +184,8 @@ module bpred (
{DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM}); {DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM});
// pipeline the class // pipeline the class
flopenrc #(5) BPInstrClassRegD(clk, reset, FlushD, ~StallD, BPInstrClassF, BPInstrClassD); flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD);
flopenrc #(5) BPInstrClassRegE(clk, reset, FlushE, ~StallE, BPInstrClassD, BPInstrClassE); flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE);
// Check the prediction // Check the prediction
// first check if the target or fallthrough address matches what was predicted. // first check if the target or fallthrough address matches what was predicted.
@ -201,7 +203,7 @@ module bpred (
// Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated. // Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated.
// Also we want to track this in a performance counter. // Also we want to track this in a performance counter.
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE; assign PredictionInstrClassWrongE = InstrClassE != PredInstrClassE;
// We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about // We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about
// the direction or class, but correct about the target we don't have the flush the pipeline. However we still // the direction or class, but correct about the target we don't have the flush the pipeline. However we still
@ -209,11 +211,14 @@ module bpred (
assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE; assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE;
// If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter. // If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter.
assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE; assign BTBPredPCWrongE = (InstrClassE[3] | InstrClassE[1]) & PredictionPCWrongE;
// similar with RAS // similar with RAS
assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE; assign RASPredPCWrongE = InstrClassE[2] & PredictionPCWrongE;
// Finally if the real instruction class is non CFI but the predictor said it was we need to count. // Finally if the real instruction class is non CFI but the predictor said it was we need to count.
assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE; assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE;
// branch class prediction wrong.
assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD;
// Selects the BP or PC+2/4. // Selects the BP or PC+2/4.

View File

@ -56,7 +56,7 @@ module ifu (
output logic [31:0] InstrD, InstrM, output logic [31:0] InstrD, InstrM,
output logic [`XLEN-1:0] PCM, output logic [`XLEN-1:0] PCM,
// branch predictor // branch predictor
output logic [4:0] InstrClassM, output logic [3:0] InstrClassM,
output logic DirPredictionWrongM, output logic DirPredictionWrongM,
output logic BTBPredPCWrongM, output logic BTBPredPCWrongM,
output logic RASPredPCWrongM, output logic RASPredPCWrongM,

View File

@ -0,0 +1,225 @@
///////////////////////////////////////////
// gsharePredictor.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Global History Branch predictor with parameterized global history register
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module optgshare
#(parameter int k = 10
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
// input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] DirPredictionF,
output logic DirPredictionWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW,
input logic PCSrcE
);
logic MatchF, MatchD, MatchE, MatchM, MatchW;
logic MatchNextX, MatchXF;
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW;
logic [k-1:0] GHRF;
logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW;
logic [k-1:0] GHRNextF;
logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW;
logic [k-1:0] IndexNextF, IndexF;
logic [k-1:0] IndexD, IndexE, IndexM, IndexW;
logic PCSrcM, PCSrcW;
logic [`XLEN-1:0] PCW;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
logic [k+4:0] GHRNext, GHR;
logic GHRUpdateEn;
assign GHRUpdateEn = BranchInstrF | (DirPredictionWrongE & BranchInstrE) |
FlushD | FlushE | FlushM | FlushW;
// it doesn't work this way. Instead we need to see how many branch instructions are flushed.
// then shift over by that amount.
logic RemoveBrW, RemoveBrM, RemoveBrE, RemoveBrD, RemoveBrF, RemoveBrNextF;
assign RemoveBrW = '0;
assign RemoveBrM = BranchInstrM & FlushW;
assign RemoveBrE = BranchInstrE & FlushM;
assign RemoveBrD = BranchInstrD & FlushE;
assign RemoveBrF = BranchInstrF & FlushD;
assign RemoveBrNextF = BranchInstrF & FlushD;
always_comb begin
casez ({BranchInstrF, DirPredictionWrongE, RemoveBrF, RemoveBrD, RemoveBrE, RemoveBrM})
6'b00_0000: GHRNext = GHR; // no change
6'b00_0001: GHRNext = {GHR[k+4:k+1], GHR[k-1:0], 1'b0}; // RemoveBrM
6'b0?_0010: GHRNext = {GHR[k+4:k+2], GHR[k:0], 1'b0}; // RemoveBrE
6'b0?_0011: GHRNext = {GHR[k+4:k+2], GHR[k-1:0], 2'b0}; // RemoveBrE, RemoveBrM
6'b00_0100: GHRNext = {GHR[k+4:k+2], GHR[k-1:0], 2'b0}; // RemoveBrD
6'b00_0101: GHRNext = {GHR[k+4:k+3], GHR[k+1:0], 1'b0}; // RemoveBrD, RemoveBrM
6'b0?_0110: GHRNext = {GHR[k+4:k+3], GHR[k+1], GHR[k-1:0], 2'b0}; // RemoveBrD, RemoveBrE
6'b0?_0111: GHRNext = {GHR[k+4:k+3], GHR[k-1:0], 3'b0}; // RemoveBrD, RemoveBrE, RemoveBrM
6'b?0_1000: GHRNext = {GHR[k+2:0], 2'b0}; // RemoveBrF,
6'b?0_1001: GHRNext = {GHR[k+2:k+1], GHR[k-1:0], 3'b0}; // RemoveBrF, RemoveBrM
6'b??_1010: GHRNext = {GHR[k+2], GHR[k:0], 3'b0}; // RemoveBrF, RemoveBrE
6'b??_1011: GHRNext = {GHR[k+2], GHR[k-1:0], 4'b0}; // RemoveBrF, RemoveBrE, RemoveBrM
6'b?0_1100: GHRNext = {GHR[k+1:0], 3'b0}; // RemoveBrF, RemoveBrD
6'b?0_1101: GHRNext = {GHR[k+1], GHR[k-1:0], 4'b0}; // RemoveBrF, RemoveBrD, RemoveBrM
6'b??_1110: GHRNext = {GHR[k:0], 4'b0}; // RemoveBrF, RemoveBrD, RemoveBrE
6'b??_1111: GHRNext = {GHR[k-1:0], 5'b0}; // RemoveBrF, RemoveBrD, RemoveBrE, RemoveBrM
6'b?1_0000: GHRNext = {PCSrcE, GHR[k+3:0]}; // Miss prediction, no branches to flushes
6'b?1_0001: GHRNext = {PCSrcE, GHR[k+3:k], GHR[k-1:1], 1'b0}; // Miss prediction, branch in Memory stage dropped
6'b?1_1100: GHRNext = {PCSrcE, GHR[k+1:0], 2'b00}; // Miss prediction, cannot have RemoveBrE
6'b?1_1101: GHRNext = {PCSrcE, GHR[k+1], GHR[k-1:0], 3'b0}; // Miss prediction, cannot have RemoveBrE
6'b10_0000: GHRNext = {DirPredictionF[1], GHR[k+4:1]};
6'b10_0001: GHRNext = {DirPredictionF[1], GHR[k+4:k+1], GHR[k-1:1], 1'b0};
6'b10_0010: GHRNext = {DirPredictionF[1], GHR[k+4:k+2], GHR[k:1], 1'b0};
6'b10_0011: GHRNext = {DirPredictionF[1], GHR[k+4:k+2], GHR[k-1:1], 2'b0};
6'b10_0100: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k+1:1], 1'b0};
6'b10_0101: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k+1], GHR[k-1:1], 2'b0};
6'b10_0110: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k], GHR[k-1:1], 2'b0};
6'b10_0111: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k-1:1], 3'b0};
default: GHRNext = GHR;
endcase
end
flopenr #(k+5) GHRReg(clk, reset, GHRUpdateEn, GHRNext, GHR);
logic [k-1:0] GHRNextF_temp, GHRF_temp;
logic [k:0] GHRD_temp, GHRE_temp, GHRM_temp, GHRW_temp;
logic GHRFExtra_temp;
// these are also in the ieu controller. should create inputs.
logic InstrValidF, InstrValidD, InstrValidE, InstrValidM, InstrValidW;
flopenrc #(1) InstrValidFReg(clk, reset, FlushD, ~StallF, 1'b1, InstrValidF);
flopenrc #(1) InstrValidDReg(clk, reset, FlushD, ~StallD, InstrValidF, InstrValidD);
flopenrc #(1) InstrValidEReg(clk, reset, FlushE, ~StallE, InstrValidD, InstrValidE);
flopenrc #(1) InstrValidMReg(clk, reset, FlushM, ~StallM, InstrValidE, InstrValidM);
flopenrc #(1) InstrValidWReg(clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
assign GHRNextF_temp = GHRNext[k+4:5];
assign GHRF_temp = InstrValidF ? GHR[k+3:4] : GHRNextF_temp;
assign GHRFExtra_temp = InstrValidF ? 1'b0 : GHR[k+4];
assign GHRD_temp = InstrValidD ? GHR[k+3:3] : {GHRFExtra_temp, GHRF_temp};
assign GHRE_temp = InstrValidE ? GHR[k+2:2] : GHRD_temp;
assign GHRM_temp = InstrValidM ? GHR[k+1:1] : GHRE_temp;
assign GHRW_temp = InstrValidW ? GHR[k:0] : GHRM_temp;
assign IndexNextF = GHRNextF ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
assign IndexF = GHRF ^ {PCF[k+1] ^ PCF[1], PCF[k:2]};
assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]};
assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]};
assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
assign IndexW = GHRW[k-1:0] ^ {PCW[k+1] ^ PCW[1], PCW[k:2]};
ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallW & ~FlushW),
.ra1(IndexNextF),
.rd1(TableDirPredictionF),
.wa2(IndexW),
.wd2(NewDirPredictionW),
.we2(BranchInstrW & ~StallW & ~FlushW),
.bwe2(1'b1));
// if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage
// and then register for use in the Fetch stage.
assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF);
assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD);
assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE);
assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM);
assign MatchW = BranchInstrW & (IndexNextF == IndexW);
assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW;
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF :
MatchD ? NewDirPredictionD :
MatchE ? NewDirPredictionE :
MatchM ? NewDirPredictionM :
NewDirPredictionW;
flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF);
assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF;
// DirPrediction pipeline
flopenr #(2) PredictionRegD(clk, reset, ~StallD, DirPredictionF, DirPredictionD);
flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE);
// New prediction pipeline
satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF));
flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE));
flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM);
flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW);
// PCSrc pipeline
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
flopenrc #(1) PCSrcWReg(clk, reset, FlushW, ~StallW, PCSrcM, PCSrcW);
// GHR pipeline
assign GHRNextF = FlushD ? GHRNextD[k:1] :
BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} :
GHRF;
flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF);
assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF};
flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD);
assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);
assign GHRE = BranchInstrE ? {PCSrcE, OldGHRE[k-1:0]} : OldGHRE;
assign GHRNextM = FlushM ? GHRNextW : GHRE;
flopenr #(k+1) GHRMReg(clk, reset, (~StallM) | FlushM, GHRNextM, GHRM);
assign GHRNextW = FlushW ? GHRW : GHRM;
flopenr #(k+1) GHRWReg(clk, reset, (BranchInstrM & ~StallW) | FlushW, GHRNextW, GHRW);
assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
endmodule

View File

@ -40,22 +40,24 @@ module speculativegshare
output logic DirPredictionWrongE, output logic DirPredictionWrongE,
// update // update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW, input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW,
input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE input logic PCSrcE
); );
logic MatchF, MatchD, MatchE, MatchM, MatchW; logic MatchF, MatchD, MatchE, MatchM;
logic MatchNextX, MatchXF; logic MatchNextX, MatchXF;
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE; logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW; logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM;
logic [k-1:0] GHRF; logic [k-1:0] GHRF;
logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW; logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW;
logic [k-1:0] GHRNextF; logic [k-1:0] GHRNextF;
logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW; logic [k:-1] GHRNextD, OldGHRD;
logic [k:0] GHRNextE, GHRNextM, GHRNextW;
logic [k-1:0] IndexNextF, IndexF; logic [k-1:0] IndexNextF, IndexF;
logic [k-1:0] IndexD, IndexE, IndexM, IndexW; logic [k-1:0] IndexD, IndexE, IndexM;
logic PCSrcM, PCSrcW; logic PCSrcM, PCSrcW;
logic [`XLEN-1:0] PCW; logic [`XLEN-1:0] PCW;
@ -67,34 +69,31 @@ module speculativegshare
assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]}; assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]};
assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]}; assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]};
assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]}; assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
assign IndexW = GHRW[k-1:0] ^ {PCW[k+1] ^ PCW[1], PCW[k:2]};
ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk), ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallW & ~FlushW), .ce1(~StallF | reset), .ce2(~StallW & ~FlushW),
.ra1(IndexNextF), .ra1(IndexNextF),
.rd1(TableDirPredictionF), .rd1(TableDirPredictionF),
.wa2(IndexW), .wa2(IndexM),
.wd2(NewDirPredictionW), .wd2(NewDirPredictionM),
.we2(BranchInstrW & ~StallW & ~FlushW), .we2(BranchInstrM & ~StallW & ~FlushW),
.bwe2(1'b1)); .bwe2(1'b1));
// if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage // if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage
// and then register for use in the Fetch stage. // and then register for use in the Fetch stage.
assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF); assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF);
assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD); assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD);
assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE); assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE);
assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM); assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM);
assign MatchW = BranchInstrW & (IndexNextF == IndexW); assign MatchNextX = MatchF | MatchD | MatchE | MatchM;
assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW;
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF); flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF : assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF :
MatchD ? NewDirPredictionD : MatchD ? NewDirPredictionD :
MatchE ? NewDirPredictionE : MatchE ? NewDirPredictionE :
MatchM ? NewDirPredictionM : NewDirPredictionM;
NewDirPredictionW;
flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF); flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF);
assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF; assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF;
@ -104,11 +103,11 @@ module speculativegshare
flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE); flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE);
// New prediction pipeline // New prediction pipeline
satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF)); assign NewDirPredictionF = {DirPredictionF[1], DirPredictionF[1]};
flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD); flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE)); satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE));
flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM); flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM);
flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW);
// PCSrc pipeline // PCSrc pipeline
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
@ -121,8 +120,11 @@ module speculativegshare
flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF); flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF);
assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF}; assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]};
flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD); flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD);
assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right
WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-2:-1] : // shift left
OldGHRD;
assign GHRNextE = FlushE ? GHRNextM : GHRD; assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE); flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);

View File

@ -61,7 +61,7 @@ module csr #(parameter
input logic BTBPredPCWrongM, input logic BTBPredPCWrongM,
input logic RASPredPCWrongM, input logic RASPredPCWrongM,
input logic PredictionInstrClassWrongM, input logic PredictionInstrClassWrongM,
input logic [4:0] InstrClassM, input logic [3:0] InstrClassM,
input logic DCacheMiss, input logic DCacheMiss,
input logic DCacheAccess, input logic DCacheAccess,
input logic ICacheMiss, input logic ICacheMiss,

View File

@ -47,7 +47,7 @@ module csrc #(parameter
input logic BTBPredPCWrongM, input logic BTBPredPCWrongM,
input logic RASPredPCWrongM, input logic RASPredPCWrongM,
input logic PredictionInstrClassWrongM, input logic PredictionInstrClassWrongM,
input logic [4:0] InstrClassM, input logic [3:0] InstrClassM,
input logic DCacheMiss, input logic DCacheMiss,
input logic DCacheAccess, input logic DCacheAccess,
input logic ICacheMiss, input logic ICacheMiss,
@ -87,9 +87,9 @@ module csrc #(parameter
assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM; assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM;
assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM; assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM;
assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM; assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM;
assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM; assign CounterEvent[7] = (InstrClassM[3] | InstrClassM[1]) & InstrValidNotFlushedM;
assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM; assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM;
assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM; assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM;
assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM; assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM;
assign CounterEvent[11] = DCacheAccess; assign CounterEvent[11] = DCacheAccess;
assign CounterEvent[12] = DCacheMiss; assign CounterEvent[12] = DCacheMiss;

View File

@ -50,7 +50,7 @@ module privileged (
input logic BTBPredPCWrongM, // branch predictor guessed wrong target input logic BTBPredPCWrongM, // branch predictor guessed wrong target
input logic RASPredPCWrongM, // return adddress stack guessed wrong target input logic RASPredPCWrongM, // return adddress stack guessed wrong target
input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class
input logic [4:0] InstrClassM, // actual instruction class input logic [3:0] InstrClassM, // actual instruction class
input logic DCacheMiss, // data cache miss input logic DCacheMiss, // data cache miss
input logic DCacheAccess, // data cache accessed (hit or miss) input logic DCacheAccess, // data cache accessed (hit or miss)
input logic ICacheMiss, // instruction cache miss input logic ICacheMiss, // instruction cache miss

View File

@ -146,7 +146,7 @@ module wallypipelinedcore (
logic BTBPredPCWrongM; logic BTBPredPCWrongM;
logic RASPredPCWrongM; logic RASPredPCWrongM;
logic PredictionInstrClassWrongM; logic PredictionInstrClassWrongM;
logic [4:0] InstrClassM; logic [3:0] InstrClassM;
logic InstrAccessFaultF, HPTWInstrAccessFaultM; logic InstrAccessFaultF, HPTWInstrAccessFaultM;
logic [2:0] LSUHSIZE; logic [2:0] LSUHSIZE;
logic [2:0] LSUHBURST; logic [2:0] LSUHBURST;

View File

@ -0,0 +1,251 @@
`include "wally-config.vh"
`define NUM_REGS 32
`define NUM_CSRS 4096
`define PRINT_PC_INSTR 1
`define PRINT_MOST 1
`define PRINT_ALL 0
module rvviTrace #(
parameter int ILEN = `XLEN, // Instruction length in bits
parameter int XLEN = `XLEN, // GPR length in bits
parameter int FLEN = `FLEN, // FPR length in bits
parameter int VLEN = 0, // Vector register size in bits
parameter int NHART = 1, // Number of harts reported
parameter int RETIRE = 1) // Number of instructions that can retire during valid event
();
localparam NUMREGS = `E_SUPPORTED ? 16 : 32;
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushD, FlushE, FlushM, FlushW;
logic TrapM, TrapW;
logic IntrF, IntrD, IntrE, IntrM, IntrW;
logic HaltM, HaltW;
logic [1:0] PrivilegeModeW;
logic [`XLEN-1:0] rf[NUMREGS];
logic [NUMREGS-1:0] rf_wb;
logic [4:0] rf_a3;
logic rf_we3;
logic [`XLEN-1:0] frf[32];
logic [`NUM_REGS-1:0] frf_wb;
logic [4:0] frf_a4;
logic frf_we4;
logic [`XLEN-1:0] CSRArray [logic[11:0]];
logic CSRWriteM, CSRWriteW;
logic [11:0] CSRAdrM, CSRAdrW;
// tracer signals
logic clk;
logic valid;
logic [63:0] order [(NHART-1):0][(RETIRE-1):0];
logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0];
logic intr [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0];
logic trap [(NHART-1):0][(RETIRE-1):0];
logic halt [(NHART-1):0][(RETIRE-1):0];
logic [1:0] mode [(NHART-1):0][(RETIRE-1):0];
logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] x_wb [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] f_wb [(NHART-1):0][(RETIRE-1):0];
logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0];
logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0];
logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0];
assign clk = testbench.dut.clk;
// assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet
assign InstrValidD = testbench.dut.core.ieu.c.InstrValidD;
assign InstrValidE = testbench.dut.core.ieu.c.InstrValidE;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCNextF = testbench.dut.core.ifu.PCNextF;
assign PCF = testbench.dut.core.ifu.PCF;
assign PCD = testbench.dut.core.ifu.PCD;
assign PCE = testbench.dut.core.ifu.PCE;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushD = testbench.dut.core.FlushD;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
assign TrapM = testbench.dut.core.TrapM;
assign HaltM = testbench.DCacheFlushStart;
assign PrivilegeModeW = testbench.dut.core.priv.priv.privmode.PrivilegeModeW;
assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL;
assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL;
always_comb begin
// machine CSRs
// *** missing PMP and performance counters.
CSRArray[12'h300] = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW;
CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW;
CSRArray[12'h305] = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW;
CSRArray[12'h341] = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW;
CSRArray[12'h306] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW;
CSRArray[12'h320] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW;
CSRArray[12'h302] = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW;
CSRArray[12'h303] = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
CSRArray[12'h344] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW;
CSRArray[12'h304] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW;
CSRArray[12'h301] = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW;
CSRArray[12'hF14] = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW;
CSRArray[12'h340] = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW;
CSRArray[12'h342] = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW;
CSRArray[12'h343] = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW;
CSRArray[12'hF11] = 0;
CSRArray[12'hF12] = 0;
CSRArray[12'hF13] = `XLEN'h100;
CSRArray[12'hF15] = 0;
CSRArray[12'h34A] = 0;
// MCYCLE and MINSTRET
CSRArray[12'hB00] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[0];
CSRArray[12'hB02] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[2];
// supervisor CSRs
CSRArray[12'h100] = testbench.dut.core.priv.priv.csr.csrs.SSTATUS_REGW;
CSRArray[12'h104] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW & 12'h222;
CSRArray[12'h105] = testbench.dut.core.priv.priv.csr.csrs.STVEC_REGW;
CSRArray[12'h141] = testbench.dut.core.priv.priv.csr.csrs.SEPC_REGW;
CSRArray[12'h106] = testbench.dut.core.priv.priv.csr.csrs.SCOUNTEREN_REGW;
CSRArray[12'h180] = testbench.dut.core.priv.priv.csr.csrs.SATP_REGW;
CSRArray[12'h140] = testbench.dut.core.priv.priv.csr.csrs.csrs.SSCRATCH_REGW;
CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
// user CSRs
CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.FRM_REGW;
CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
end
genvar index;
assign rf[0] = '0;
for(index = 1; index < NUMREGS; index += 1)
assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index];
assign rf_a3 = testbench.dut.core.ieu.dp.regf.a3;
assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3;
always_comb begin
rf_wb <= '0;
if(rf_we3)
rf_wb[rf_a3] <= 1'b1;
end
for(index = 0; index < NUMREGS; index += 1)
assign frf[index] = testbench.dut.core.fpu.fpu.fregfile.rf[index];
assign frf_a4 = testbench.dut.core.fpu.fpu.fregfile.a4;
assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4;
always_comb begin
frf_wb <= '0;
if(frf_we4)
frf_wb[frf_a4] <= 1'b1;
end
assign CSRAdrM = testbench.dut.core.priv.priv.csr.CSRAdrM;
assign CSRWriteM = testbench.dut.core.priv.priv.csr.CSRWriteM;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW);
flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW);
flopenrc #(1) IntrFReg (clk, reset, 1'b0, ~StallF, TrapM, IntrF);
flopenrc #(1) IntrDReg (clk, reset, FlushD, ~StallD, IntrF, IntrD);
flopenrc #(1) IntrEReg (clk, reset, FlushE, ~StallE, IntrD, IntrE);
flopenrc #(1) IntrMReg (clk, reset, FlushM, ~StallM, IntrE, IntrM);
flopenrc #(1) IntrWReg (clk, reset, FlushW, ~StallW, IntrM, IntrW);
flopenrc #(12) CSRAdrWReg (clk, reset, FlushW, ~StallW, CSRAdrM, CSRAdrW);
flopenrc #(1) CSRWriteWReg (clk, reset, FlushW, ~StallW, CSRWriteM, CSRWriteW);
// Initially connecting the writeback stage signals, but may need to use M stage
// and gate on ~FlushW.
assign valid = InstrValidW & ~StallW & ~FlushW;
assign order[0][0] = CSRArray[12'hB02];
assign insn[0][0] = InstrRawW;
assign pc_rdata[0][0] = PCW;
assign trap[0][0] = TrapW;
assign halt[0][0] = HaltW;
assign intr[0][0] = IntrW;
assign mode[0][0] = PrivilegeModeW;
assign ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 :
PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL;
assign pc_wdata[0][0] = ~FlushW ? PCM :
~FlushM ? PCE :
~FlushE ? PCD :
~FlushD ? PCF : PCNextF;
for(index = 0; index < `NUM_REGS; index += 1) begin
assign x_wdata[0][0][index] = rf[index];
assign x_wb[0][0][index] = rf_wb[index];
assign f_wdata[0][0][index] = frf[index];
assign f_wb[0][0][index] = frf_wb[index];
end
always_comb begin
csr_wb[0][0] <= '0;
if(CSRWriteW)
csr_wb[0][0][CSRAdrW] <= 1'b1;
end
integer index3;
always_comb begin
for(index3 = 0; index3 < `NUM_CSRS; index3 += 1) begin
if(CSRArray.exists(index3))
csr[0][0][index3] = CSRArray[index3];
else
csr[0][0][index3] = '0;
end
end
// *** implementation only cancel? so sc does not clear?
assign lrsc_cancel[0][0] = '0;
integer index2;
always_ff @(posedge clk) begin
if(valid) begin
if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST))
$display("order = %08d, PC = %08x, insn = %08x", order[0][0], pc_rdata[0][0], insn[0][0]);
else if(`PRINT_MOST & !`PRINT_ALL)
$display("order = %08d, PC = %010x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %010x, x%02d = %016x, f%02d = %016x, csr%03x = %016x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4], CSRAdrW, csr[0][0][CSRAdrW]);
else if(`PRINT_ALL) begin
$display("order = %08d, PC = %08x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]);
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("x%02d = %08x", index2, x_wdata[0][0][index2]);
end
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("f%02d = %08x", index2, f_wdata[0][0][index2]);
end
end
end
if(HaltW) $stop();
end
endmodule

View File

@ -510,7 +510,6 @@ logic [3:0] dummy;
end end
end end
endmodule endmodule
module riscvassertions; module riscvassertions;
@ -692,3 +691,56 @@ task automatic updateProgramAddrLabelArray;
$fclose(ProgramLabelMapFP); $fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP); $fclose(ProgramAddrMapFP);
endtask endtask
`define NUM_REGS 32
`define NUM_CSRS 4096
module rvviTrace();
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushE, FlushM, FlushW;
// tracer signals
logic clk;
logic valid;
logic [`XLEN-1:0] insn;
logic [`XLEN-1:0 ] pc_rdata;
assign clk = testbench.dut.clk;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
assign valid = InstrValidW;
assign insn = InstrRawW;
assign pc_rdata = PCW;
always_ff @(posedge clk) begin
if(valid) begin
$display("PC = %x, insn = %x", pc_rdata, insn);
end
end
endmodule

View File

@ -0,0 +1,439 @@
///////////////////////////////////////////
// testbench.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs from the riscv-arch-test and Imperas suites
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module testbench;
parameter DEBUG=0;
logic clk;
logic reset_ext, reset;
logic [`XLEN-1:0] testadr, testadrNoBase;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
logic [3:0] dummy;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [`PA_BITS-1:0] HADDR;
logic [`AHBW-1:0] HWDATA;
logic [`XLEN/8-1:0] HWSTRB;
logic HWRITE;
logic [2:0] HSIZE;
logic [2:0] HBURST;
logic [3:0] HPROT;
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
string ProgramAddrMapFile, ProgramLabelMapFile;
integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
logic DCacheFlushDone, DCacheFlushStart;
string testName;
string memfilename, pathname, adrstr;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
logic SDCCLK;
logic SDCCmdIn;
logic SDCCmdOut;
logic SDCCmdOE;
logic [3:0] SDCDatIn;
tri1 [3:0] SDCDat;
tri1 SDCCmd;
logic HREADY;
logic HSELEXT;
logic InitializingMemories;
integer ResetCount, ResetThreshold;
logic InReset;
// Imperas look here.
initial
begin
ResetCount = 0;
ResetThreshold = 2;
InReset = 1;
testadr = 0;
testadrNoBase = 0;
//testName = "rv64i_m/I/src/add-01.S";
testName = "rv64i_m/privilege/src/WALLY-mmu-sv48-01.S";
//pathname = "../../tests/riscof/work/riscv-arch-test/";
pathname = "../../tests/riscof/work/wally-riscv-arch-test/";
memfilename = {pathname, testName, "/ref/ref.elf.memfile"};
if (`BUS) $readmemh(memfilename, dut.uncore.uncore.ram.ram.memory.RAM);
else $error("Imperas test bench requires BUS.");
ProgramAddrMapFile = {pathname, testName, "/ref/ref.elf.objdump.addr"};
ProgramLabelMapFile = {pathname, testName, "/ref/ref.elf.objdump.lab"};
// declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
// to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
end
rvviTrace rvviTrace();
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
// check assertions for a legal configuration
riscvassertions riscvassertions();
// instantiate device to be tested
assign GPIOPinsIn = 0;
assign UARTSin = 1;
if(`EXT_MEM_SUPPORTED) begin
ram_ahb #(.BASE(`EXT_MEM_BASE), .RANGE(`EXT_MEM_RANGE))
ram (.HCLK, .HRESETn, .HADDR, .HWRITE, .HTRANS, .HWDATA, .HSELRam(HSELEXT),
.HREADRam(HRDATAEXT), .HREADYRam(HREADYEXT), .HRESPRam(HRESPEXT), .HREADY,
.HWSTRB);
end else begin
assign HREADYEXT = 1;
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
end
if(`FPGA) begin : sdcard
sdModel sdcard
(.sdClk(SDCCLK),
.cmd(SDCCmd),
.dat(SDCDat));
assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz;
assign SDCCmdIn = SDCCmd;
assign SDCDatIn = SDCDat;
end else begin
assign SDCCmd = '0;
assign SDCDat = '0;
end
wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT,
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
dut.core.ifu.FinalInstrRawF[31:0],
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
dut.core.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
// if ($time % 100000 == 0) $display("Time is %0t", $time);
end
// check results
assign reset_ext = InReset;
always @(negedge clk)
begin
InitializingMemories = 0;
if(InReset == 1) begin
// once the test inidicates it's done we need to immediately hold reset for a number of cycles.
if(ResetCount < ResetThreshold) ResetCount = ResetCount + 1;
else begin // hit reset threshold so we remove reset.
InReset = 0;
ResetCount = 0;
end
end
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : FunctionName
FunctionName FunctionName(.reset(reset),
.clk(clk),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// Termination condition
// terminate on a specific ECALL after li x3,1 for old Imperas tests, *** remove this when old imperas tests are removed
// or sw gp,-56(t0) for new Imperas tests
// or sd gp, -56(t0)
// or on a jump to self infinite loop (6f) for RISC-V Arch tests
logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
if (`ZICSR_SUPPORTED) assign ecf = dut.core.priv.priv.EcallFaultM;
else assign ecf = 0;
assign DCacheFlushStart = ecf &
(dut.core.ieu.dp.regf.rf[3] == 1 |
(dut.core.ieu.dp.regf.we3 &
dut.core.ieu.dp.regf.a3 == 3 &
dut.core.ieu.dp.regf.wd3 == 1)) |
((dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM ) |
((dut.core.lsu.IEUAdrM == ProgramAddrLabelArray["tohost"]) & InstrMName == "SW" );
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
.reset(reset),
.start(DCacheFlushStart),
.done(DCacheFlushDone));
// initialize the branch predictor
if (`BPRED_ENABLED == 1)
begin
genvar adrindex;
// Initializing all zeroes into the branch predictor memory.
for(adrindex = 0; adrindex < 1024; adrindex++) begin
initial begin
force dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem[adrindex] = 0;
force dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem[adrindex] = 0;
#1;
release dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem[adrindex];
release dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem[adrindex];
end
end
end
// check for hange up.
logic [`XLEN-1:0] OldPCW;
integer WatchDogTimerCount;
localparam WatchDogTimerThreshold = 1000000;
logic WatchDogTimeOut;
always_ff @(posedge clk) begin
OldPCW <= PCW;
if(OldPCW == PCW) WatchDogTimerCount = WatchDogTimerCount + 1'b1;
else WatchDogTimerCount = '0;
end
always_comb begin
WatchDogTimeOut = WatchDogTimerCount >= WatchDogTimerThreshold;
if(WatchDogTimeOut) begin
$display("FAILURE: Watch Dog Time Out triggered. PCW stuck at %x for more than %d cycles", PCW, WatchDogTimerCount);
$stop;
end
end
endmodule
module riscvassertions;
initial begin
assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)");
assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)");
assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)");
assert (`DCACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`FLEN<=`XLEN | `DCACHE | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled");
assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size");
assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2");
assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2");
assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF");
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses");
assert (`DCACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`ICACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
assert ((`DCACHE == 0 & `ICACHE == 0) | `BUS) else $error("Dcache and Icache requires DBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
assert (`DCACHE | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally.");
assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED");
end
// *** DH 8/23/
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */
module DCacheFlushFSM
(input logic clk,
input logic reset,
input logic start,
output logic done);
genvar adr;
logic [`XLEN-1:0] ShadowRAM[`UNCORE_RAM_BASE>>(1+`XLEN/32):(`UNCORE_RAM_RANGE+`UNCORE_RAM_BASE)>>1+(`XLEN/32)];
if(`DCACHE) begin
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN;
localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN;
localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM;
//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM;
localparam integer numwords = sramlen/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer loglinebytelen = $clog2(linebytelen);
localparam integer lognumways = $clog2(numways);
localparam integer tagstart = lognumlines + loglinebytelen;
genvar index, way, cacheWord;
logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [sramlen-1:0] cacheline;
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen), .sramlen(sramlen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS-1-tagstart:0]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
// these dirty bit selections would be needed if dirty is moved inside the tag array.
//.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]),
//.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS+tagstart]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.RAM[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
end
integer i, j, k, l;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(l = 0; l < cachesramwords; l++) begin
if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin
for(k = 0; k < numwords; k++) begin
//cacheline = CacheData[j][i][0];
// does not work with modelsim
// # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions.
// see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions
//ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k];
ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN];
end
end
end
end
end
end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule
module copyShadow
#(parameter tagstart, loglinebytelen, sramlen)
(input logic clk,
input logic start,
input logic [`PA_BITS-1:tagstart] tag,
input logic valid, dirty,
input logic [sramlen-1:0] data,
input logic [32-1:0] index,
input logic [32-1:0] cacheWord,
output logic [sramlen-1:0] CacheData,
output logic [`PA_BITS-1:0] CacheAdr,
output logic [`XLEN-1:0] CacheTag,
output logic CacheValid,
output logic CacheDirty);
always_ff @(posedge clk) begin
if(start) begin
CacheTag = tag;
CacheValid = valid;
CacheDirty = dirty;
CacheData = data;
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8));
end
end
endmodule
task automatic updateProgramAddrLabelArray;
input string ProgramAddrMapFile, ProgramLabelMapFile;
inout integer ProgramAddrLabelArray [string];
// Gets the memory location of begin_signature
integer ProgramLabelMapFP, ProgramAddrMapFP;
ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMapFP & ProgramAddrMapFP) begin // check we found both files
while (!$feof(ProgramLabelMapFP)) begin
string label, adrstr;
integer returncode;
returncode = $fscanf(ProgramLabelMapFP, "%s\n", label);
returncode = $fscanf(ProgramAddrMapFP, "%s\n", adrstr);
if (ProgramAddrLabelArray.exists(label))
ProgramAddrLabelArray[label] = adrstr.atohex();
end
end
$fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP);
endtask