csr cleanup

This commit is contained in:
David Harris 2023-01-13 20:55:21 -08:00
commit 7358402bc0
14 changed files with 1097 additions and 73 deletions

View File

@ -0,0 +1,45 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings.
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench_imperas.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063
vopt +acc work.testbench -G DEBUG=1 -o workopt
vsim workopt +nowarn3829 -fatal 7
view wave
#-- display input and output signals as hexidecimal values
add log -recursive /*
do wave.do
run -all
noview ../testbench/testbench_imperas.sv
view wave

View File

@ -37,13 +37,13 @@ module BTBPredictor
input logic StallF, StallE,
input logic [`XLEN-1:0] LookUpPC,
output logic [`XLEN-1:0] TargetPC,
output logic [4:0] InstrClass,
output logic [3:0] InstrClass,
output logic Valid,
// update
input logic UpdateEN,
input logic [`XLEN-1:0] UpdatePC,
input logic [`XLEN-1:0] UpdateTarget,
input logic [4:0] UpdateInstrClass,
input logic [3:0] UpdateInstrClass,
input logic UpdateInvalid
);
@ -99,7 +99,7 @@ module BTBPredictor
// *** need to add forwarding.
// *** optimize for byte write enables
ram2p1r1wb #(Depth, `XLEN+5) memory(.clk(clk),
ram2p1r1wb #(Depth, `XLEN+4) memory(.clk(clk),
.reset(reset),
.ra1(LookUpPCIndex),
.rd1({{InstrClass, TargetPC}}),
@ -107,7 +107,7 @@ module BTBPredictor
.wa2(UpdatePCIndex),
.wd2({UpdateInstrClass, UpdateTarget}),
.wen2(UpdateEN),
.bwe2({5'h1F, {`XLEN{1'b1}}})); // *** definitely not right.
.bwe2({4'hF, {`XLEN{1'b1}}})); // *** definitely not right.
endmodule

View File

@ -32,14 +32,19 @@ module RASPredictor
#(parameter int StackSize = 16
)
(input logic clk,
input logic reset,
input logic pop,
output logic [`XLEN-1:0] popPC,
input logic push,
input logic incr,
input logic [`XLEN-1:0] pushPC
input logic reset,
input logic PopF,
output logic [`XLEN-1:0] RASPCF,
input logic [3:0] WrongPredInstrClassD,
input logic [3:0] InstrClassD,
input logic PushE,
input logic incr,
input logic [`XLEN-1:0] PCLinkE
);
// *** need to update so it either doesn't push until the memory stage
// or need to repair flushed push.
// *** need to repair popped and then flushed returns.
logic CounterEn;
localparam Depth = $clog2(StackSize);
@ -47,13 +52,13 @@ module RASPredictor
logic [StackSize-1:0] [`XLEN-1:0] memory;
integer index;
assign CounterEn = pop | push | incr;
assign CounterEn = PopF | PushE | incr | WrongPredInstrClassD[2];
assign PtrD = pop ? PtrM1 : PtrP1;
assign PtrD = PopF | InstrClassD[2] ? PtrM1 : PtrP1;
assign PtrM1 = PtrQ - 1'b1;
assign PtrP1 = PtrQ + 1'b1;
// may have to handle a push and an incr at the same time.
// may have to handle a PushE and an incr at the same time.
// *** what happens if jal is executing and there is a return being flushed in Decode?
flopenr #(Depth) PTR(.clk(clk),
@ -67,12 +72,12 @@ module RASPredictor
if(reset) begin
for(index=0; index<StackSize; index++)
memory[index] <= {`XLEN{1'b0}};
end else if(push) begin
memory[PtrP1] <= #1 pushPC;
end else if(PushE) begin
memory[PtrP1] <= #1 PCLinkE;
end
end
assign popPC = memory[PtrQ];
assign RASPCF = memory[PtrQ];
endmodule

View File

@ -52,7 +52,7 @@ module bpred (
input logic PCSrcE, // Executation stage branch is taken
input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address
input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
output logic [4:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br
// Report branch prediction status
output logic BPPredWrongE, // Prediction is wrong.
@ -65,13 +65,13 @@ module bpred (
logic BTBValidF;
logic [1:0] DirPredictionF;
logic [4:0] BPInstrClassF, BPInstrClassD, BPInstrClassE;
logic [3:0] PredInstrClassF, PredInstrClassD, PredInstrClassE;
logic [`XLEN-1:0] BTBPredPCF, RASPCF;
logic TargetWrongE;
logic FallThroughWrongE;
logic PredictionPCWrongE;
logic PredictionInstrClassWrongE;
logic [4:0] InstrClassD, InstrClassE, InstrClassW;
logic [3:0] InstrClassD, InstrClassE, InstrClassW;
logic DirPredictionWrongE, BTBPredPCWrongE, RASPredPCWrongE, BPPredClassNonCFIWrongE;
logic SelBPPredF;
@ -79,7 +79,8 @@ module bpred (
logic BPPredWrongM;
logic [`XLEN-1:0] PCNext0F;
logic [`XLEN-1:0] PCCorrectE;
logic [3:0] WrongPredInstrClassD;
// Part 1 branch direction prediction
// look into the 2 port Sram model. something is wrong.
if (`BPTYPE == "BPTWOBIT") begin:Predictor
@ -95,7 +96,7 @@ module bpred (
end else if (`BPTYPE == "BPSPECULATIVEGLOBAL") begin:Predictor
speculativeglobalhistory #(10) DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrF(BPInstrClassF[0]), .BranchInstrD(BPInstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrW(InstrClassW[0]), .PCSrcE);
end else if (`BPTYPE == "BPGSHARE") begin:Predictor
@ -106,8 +107,8 @@ module bpred (
end else if (`BPTYPE == "BPSPECULATIVEGSHARE") begin:Predictor
speculativegshare DirPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PCNextF, .PCF, .PCD, .PCE, .PCM, .DirPredictionF, .DirPredictionWrongE,
.BranchInstrF(BPInstrClassF[0]), .BranchInstrD(BPInstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrW(InstrClassW[0]), .PCSrcE);
.BranchInstrF(PredInstrClassF[0]), .BranchInstrD(InstrClassD[0]), .BranchInstrE(InstrClassE[0]), .BranchInstrM(InstrClassM[0]),
.BranchInstrW(InstrClassW[0]), .WrongPredInstrClassD, .PCSrcE);
end else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
// *** Fix me
@ -129,10 +130,9 @@ module bpred (
// 1) A direction (1 = Taken, 0 = Not Taken)
// 2) Any information which is necessary for the predictor to build its next state.
// For a 2 bit table this is the prediction count.
assign SelBPPredF = ((BPInstrClassF[0] & DirPredictionF[1] & BTBValidF) |
BPInstrClassF[3] |
(BPInstrClassF[2] & BTBValidF) |
BPInstrClassF[1] & BTBValidF) ;
assign SelBPPredF = (PredInstrClassF[0] & DirPredictionF[1] & BTBValidF) |
PredInstrClassF[2] |
(PredInstrClassF[1] & BTBValidF) ;
// Part 2 Branch target address prediction
// *** For now the BTB will house the direct and indirect targets
@ -143,7 +143,7 @@ module bpred (
.*, // Stalls and flushes
.LookUpPC(PCNextF),
.TargetPC(BTBPredPCF),
.InstrClass(BPInstrClassF),
.InstrClass(PredInstrClassF),
.Valid(BTBValidF),
// update
.UpdateEN((|InstrClassE | (PredictionInstrClassWrongE)) & ~StallE),
@ -154,26 +154,28 @@ module bpred (
// Part 3 RAS
// *** need to add the logic to restore RAS on flushes. We will use incr for this.
// *** needs to include flushX
RASPredictor RASPredictor(.clk(clk),
.reset(reset),
.pop(BPInstrClassF[3] & ~StallF),
.popPC(RASPCF),
.push(InstrClassE[4] & ~StallE),
.PopF(PredInstrClassF[2] & ~StallF),
.WrongPredInstrClassD,
.InstrClassD,
.RASPCF,
.PushE(InstrClassE[3] & ~StallE),
.incr(1'b0),
.pushPC(PCLinkE));
.PCLinkE);
assign BPPredPCF = BPInstrClassF[3] ? RASPCF : BTBPredPCF;
assign BPPredPCF = PredInstrClassF[2] ? RASPCF : BTBPredPCF;
// the branch predictor needs a compact decoding of the instruction class.
// *** consider adding in the alternate return address x5 for returns.
assign InstrClassD[4] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or r5
assign InstrClassD[3] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01; // jump register, but not return
assign InstrClassD[1] = InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01; // jump, RD != x1 or x5
assign InstrClassD[3] = (InstrD[6:0] & 7'h77) == 7'h67 & (InstrD[11:07] & 5'h1B) == 5'h01; // jal(r) must link to ra or x5
assign InstrClassD[2] = InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) == 5'h01; // return must return to ra or r5
assign InstrClassD[1] = (InstrD[6:0] == 7'h67 & (InstrD[19:15] & 5'h1B) != 5'h01 & (InstrD[11:7] & 5'h1B) != 5'h01) | // jump register, but not return
(InstrD[6:0] == 7'h6F & (InstrD[11:7] & 5'h1B) != 5'h01); // jump, RD != x1 or x5
assign InstrClassD[0] = InstrD[6:0] == 7'h63; // branch
flopenrc #(5) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE);
flopenrc #(5) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM);
flopenrc #(5) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW);
flopenrc #(4) InstrClassRegE(clk, reset, FlushE, ~StallE, InstrClassD, InstrClassE);
flopenrc #(4) InstrClassRegM(clk, reset, FlushM, ~StallM, InstrClassE, InstrClassM);
flopenrc #(4) InstrClassRegW(clk, reset, FlushW, ~StallW, InstrClassM, InstrClassW);
flopenrc #(1) BPPredWrongMReg(clk, reset, FlushM, ~StallM, BPPredWrongE, BPPredWrongM);
// branch predictor
@ -182,8 +184,8 @@ module bpred (
{DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM});
// pipeline the class
flopenrc #(5) BPInstrClassRegD(clk, reset, FlushD, ~StallD, BPInstrClassF, BPInstrClassD);
flopenrc #(5) BPInstrClassRegE(clk, reset, FlushE, ~StallE, BPInstrClassD, BPInstrClassE);
flopenrc #(4) PredInstrClassRegD(clk, reset, FlushD, ~StallD, PredInstrClassF, PredInstrClassD);
flopenrc #(4) PredInstrClassRegE(clk, reset, FlushE, ~StallE, PredInstrClassD, PredInstrClassE);
// Check the prediction
// first check if the target or fallthrough address matches what was predicted.
@ -201,7 +203,7 @@ module bpred (
// Finally we need to check if the class is wrong. When the class is wrong the BTB needs to be updated.
// Also we want to track this in a performance counter.
assign PredictionInstrClassWrongE = InstrClassE != BPInstrClassE;
assign PredictionInstrClassWrongE = InstrClassE != PredInstrClassE;
// We want to output to the instruction fetch if the PC fetched was wrong. If by chance the predictor was wrong about
// the direction or class, but correct about the target we don't have the flush the pipeline. However we still
@ -209,11 +211,14 @@ module bpred (
assign BPPredWrongE = (PredictionPCWrongE & |InstrClassE) | BPPredClassNonCFIWrongE;
// If we have a jump, jump register or jal or jalr and the PC is wrong we need to increment the performance counter.
assign BTBPredPCWrongE = (InstrClassE[4] | InstrClassE[2] | InstrClassE[1]) & PredictionPCWrongE;
assign BTBPredPCWrongE = (InstrClassE[3] | InstrClassE[1]) & PredictionPCWrongE;
// similar with RAS
assign RASPredPCWrongE = InstrClassE[3] & PredictionPCWrongE;
assign RASPredPCWrongE = InstrClassE[2] & PredictionPCWrongE;
// Finally if the real instruction class is non CFI but the predictor said it was we need to count.
assign BPPredClassNonCFIWrongE = PredictionInstrClassWrongE & ~|InstrClassE;
// branch class prediction wrong.
assign WrongPredInstrClassD = PredInstrClassD ^ InstrClassD;
// Selects the BP or PC+2/4.

View File

@ -56,7 +56,7 @@ module ifu (
output logic [31:0] InstrD, InstrM,
output logic [`XLEN-1:0] PCM,
// branch predictor
output logic [4:0] InstrClassM,
output logic [3:0] InstrClassM,
output logic DirPredictionWrongM,
output logic BTBPredPCWrongM,
output logic RASPredPCWrongM,

View File

@ -0,0 +1,225 @@
///////////////////////////////////////////
// gsharePredictor.sv
//
// Written: Shreya Sanghai
// Email: ssanghai@hmc.edu
// Created: March 16, 2021
// Modified:
//
// Purpose: Global History Branch predictor with parameterized global history register
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module optgshare
#(parameter int k = 10
)
(input logic clk,
input logic reset,
input logic StallF, StallD, StallE, StallM, StallW,
input logic FlushD, FlushE, FlushM, FlushW,
// input logic [`XLEN-1:0] LookUpPC,
output logic [1:0] DirPredictionF,
output logic DirPredictionWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW,
input logic PCSrcE
);
logic MatchF, MatchD, MatchE, MatchM, MatchW;
logic MatchNextX, MatchXF;
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW;
logic [k-1:0] GHRF;
logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW;
logic [k-1:0] GHRNextF;
logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW;
logic [k-1:0] IndexNextF, IndexF;
logic [k-1:0] IndexD, IndexE, IndexM, IndexW;
logic PCSrcM, PCSrcW;
logic [`XLEN-1:0] PCW;
logic [1:0] ForwardNewDirPrediction, ForwardDirPredictionF;
logic [k+4:0] GHRNext, GHR;
logic GHRUpdateEn;
assign GHRUpdateEn = BranchInstrF | (DirPredictionWrongE & BranchInstrE) |
FlushD | FlushE | FlushM | FlushW;
// it doesn't work this way. Instead we need to see how many branch instructions are flushed.
// then shift over by that amount.
logic RemoveBrW, RemoveBrM, RemoveBrE, RemoveBrD, RemoveBrF, RemoveBrNextF;
assign RemoveBrW = '0;
assign RemoveBrM = BranchInstrM & FlushW;
assign RemoveBrE = BranchInstrE & FlushM;
assign RemoveBrD = BranchInstrD & FlushE;
assign RemoveBrF = BranchInstrF & FlushD;
assign RemoveBrNextF = BranchInstrF & FlushD;
always_comb begin
casez ({BranchInstrF, DirPredictionWrongE, RemoveBrF, RemoveBrD, RemoveBrE, RemoveBrM})
6'b00_0000: GHRNext = GHR; // no change
6'b00_0001: GHRNext = {GHR[k+4:k+1], GHR[k-1:0], 1'b0}; // RemoveBrM
6'b0?_0010: GHRNext = {GHR[k+4:k+2], GHR[k:0], 1'b0}; // RemoveBrE
6'b0?_0011: GHRNext = {GHR[k+4:k+2], GHR[k-1:0], 2'b0}; // RemoveBrE, RemoveBrM
6'b00_0100: GHRNext = {GHR[k+4:k+2], GHR[k-1:0], 2'b0}; // RemoveBrD
6'b00_0101: GHRNext = {GHR[k+4:k+3], GHR[k+1:0], 1'b0}; // RemoveBrD, RemoveBrM
6'b0?_0110: GHRNext = {GHR[k+4:k+3], GHR[k+1], GHR[k-1:0], 2'b0}; // RemoveBrD, RemoveBrE
6'b0?_0111: GHRNext = {GHR[k+4:k+3], GHR[k-1:0], 3'b0}; // RemoveBrD, RemoveBrE, RemoveBrM
6'b?0_1000: GHRNext = {GHR[k+2:0], 2'b0}; // RemoveBrF,
6'b?0_1001: GHRNext = {GHR[k+2:k+1], GHR[k-1:0], 3'b0}; // RemoveBrF, RemoveBrM
6'b??_1010: GHRNext = {GHR[k+2], GHR[k:0], 3'b0}; // RemoveBrF, RemoveBrE
6'b??_1011: GHRNext = {GHR[k+2], GHR[k-1:0], 4'b0}; // RemoveBrF, RemoveBrE, RemoveBrM
6'b?0_1100: GHRNext = {GHR[k+1:0], 3'b0}; // RemoveBrF, RemoveBrD
6'b?0_1101: GHRNext = {GHR[k+1], GHR[k-1:0], 4'b0}; // RemoveBrF, RemoveBrD, RemoveBrM
6'b??_1110: GHRNext = {GHR[k:0], 4'b0}; // RemoveBrF, RemoveBrD, RemoveBrE
6'b??_1111: GHRNext = {GHR[k-1:0], 5'b0}; // RemoveBrF, RemoveBrD, RemoveBrE, RemoveBrM
6'b?1_0000: GHRNext = {PCSrcE, GHR[k+3:0]}; // Miss prediction, no branches to flushes
6'b?1_0001: GHRNext = {PCSrcE, GHR[k+3:k], GHR[k-1:1], 1'b0}; // Miss prediction, branch in Memory stage dropped
6'b?1_1100: GHRNext = {PCSrcE, GHR[k+1:0], 2'b00}; // Miss prediction, cannot have RemoveBrE
6'b?1_1101: GHRNext = {PCSrcE, GHR[k+1], GHR[k-1:0], 3'b0}; // Miss prediction, cannot have RemoveBrE
6'b10_0000: GHRNext = {DirPredictionF[1], GHR[k+4:1]};
6'b10_0001: GHRNext = {DirPredictionF[1], GHR[k+4:k+1], GHR[k-1:1], 1'b0};
6'b10_0010: GHRNext = {DirPredictionF[1], GHR[k+4:k+2], GHR[k:1], 1'b0};
6'b10_0011: GHRNext = {DirPredictionF[1], GHR[k+4:k+2], GHR[k-1:1], 2'b0};
6'b10_0100: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k+1:1], 1'b0};
6'b10_0101: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k+1], GHR[k-1:1], 2'b0};
6'b10_0110: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k], GHR[k-1:1], 2'b0};
6'b10_0111: GHRNext = {DirPredictionF[1], GHR[k+4:k+3], GHR[k-1:1], 3'b0};
default: GHRNext = GHR;
endcase
end
flopenr #(k+5) GHRReg(clk, reset, GHRUpdateEn, GHRNext, GHR);
logic [k-1:0] GHRNextF_temp, GHRF_temp;
logic [k:0] GHRD_temp, GHRE_temp, GHRM_temp, GHRW_temp;
logic GHRFExtra_temp;
// these are also in the ieu controller. should create inputs.
logic InstrValidF, InstrValidD, InstrValidE, InstrValidM, InstrValidW;
flopenrc #(1) InstrValidFReg(clk, reset, FlushD, ~StallF, 1'b1, InstrValidF);
flopenrc #(1) InstrValidDReg(clk, reset, FlushD, ~StallD, InstrValidF, InstrValidD);
flopenrc #(1) InstrValidEReg(clk, reset, FlushE, ~StallE, InstrValidD, InstrValidE);
flopenrc #(1) InstrValidMReg(clk, reset, FlushM, ~StallM, InstrValidE, InstrValidM);
flopenrc #(1) InstrValidWReg(clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
assign GHRNextF_temp = GHRNext[k+4:5];
assign GHRF_temp = InstrValidF ? GHR[k+3:4] : GHRNextF_temp;
assign GHRFExtra_temp = InstrValidF ? 1'b0 : GHR[k+4];
assign GHRD_temp = InstrValidD ? GHR[k+3:3] : {GHRFExtra_temp, GHRF_temp};
assign GHRE_temp = InstrValidE ? GHR[k+2:2] : GHRD_temp;
assign GHRM_temp = InstrValidM ? GHR[k+1:1] : GHRE_temp;
assign GHRW_temp = InstrValidW ? GHR[k:0] : GHRM_temp;
assign IndexNextF = GHRNextF ^ {PCNextF[k+1] ^ PCNextF[1], PCNextF[k:2]};
assign IndexF = GHRF ^ {PCF[k+1] ^ PCF[1], PCF[k:2]};
assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]};
assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]};
assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
assign IndexW = GHRW[k-1:0] ^ {PCW[k+1] ^ PCW[1], PCW[k:2]};
ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallW & ~FlushW),
.ra1(IndexNextF),
.rd1(TableDirPredictionF),
.wa2(IndexW),
.wd2(NewDirPredictionW),
.we2(BranchInstrW & ~StallW & ~FlushW),
.bwe2(1'b1));
// if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage
// and then register for use in the Fetch stage.
assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF);
assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD);
assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE);
assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM);
assign MatchW = BranchInstrW & (IndexNextF == IndexW);
assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW;
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF :
MatchD ? NewDirPredictionD :
MatchE ? NewDirPredictionE :
MatchM ? NewDirPredictionM :
NewDirPredictionW;
flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF);
assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF;
// DirPrediction pipeline
flopenr #(2) PredictionRegD(clk, reset, ~StallD, DirPredictionF, DirPredictionD);
flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE);
// New prediction pipeline
satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF));
flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE));
flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM);
flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW);
// PCSrc pipeline
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
flopenrc #(1) PCSrcWReg(clk, reset, FlushW, ~StallW, PCSrcM, PCSrcW);
// GHR pipeline
assign GHRNextF = FlushD ? GHRNextD[k:1] :
BranchInstrF ? {DirPredictionF[1], GHRF[k-1:1]} :
GHRF;
flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF);
assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF};
flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD);
assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);
assign GHRE = BranchInstrE ? {PCSrcE, OldGHRE[k-1:0]} : OldGHRE;
assign GHRNextM = FlushM ? GHRNextW : GHRE;
flopenr #(k+1) GHRMReg(clk, reset, (~StallM) | FlushM, GHRNextM, GHRM);
assign GHRNextW = FlushW ? GHRW : GHRM;
flopenr #(k+1) GHRWReg(clk, reset, (BranchInstrM & ~StallW) | FlushW, GHRNextW, GHRW);
assign DirPredictionWrongE = PCSrcE != DirPredictionE[1] & BranchInstrE;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
endmodule

View File

@ -40,22 +40,24 @@ module speculativegshare
output logic DirPredictionWrongE,
// update
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,
input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW,
input logic BranchInstrF, BranchInstrD, BranchInstrE, BranchInstrM, BranchInstrW,
input logic [3:0] WrongPredInstrClassD,
input logic PCSrcE
);
logic MatchF, MatchD, MatchE, MatchM, MatchW;
logic MatchF, MatchD, MatchE, MatchM;
logic MatchNextX, MatchXF;
logic [1:0] TableDirPredictionF, DirPredictionD, DirPredictionE;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM, NewDirPredictionW;
logic [1:0] NewDirPredictionF, NewDirPredictionD, NewDirPredictionE, NewDirPredictionM;
logic [k-1:0] GHRF;
logic [k:0] GHRD, OldGHRE, GHRE, GHRM, GHRW;
logic [k-1:0] GHRNextF;
logic [k:0] GHRNextD, GHRNextE, GHRNextM, GHRNextW;
logic [k:-1] GHRNextD, OldGHRD;
logic [k:0] GHRNextE, GHRNextM, GHRNextW;
logic [k-1:0] IndexNextF, IndexF;
logic [k-1:0] IndexD, IndexE, IndexM, IndexW;
logic [k-1:0] IndexD, IndexE, IndexM;
logic PCSrcM, PCSrcW;
logic [`XLEN-1:0] PCW;
@ -67,34 +69,31 @@ module speculativegshare
assign IndexD = GHRD[k-1:0] ^ {PCD[k+1] ^ PCD[1], PCD[k:2]};
assign IndexE = GHRE[k-1:0] ^ {PCE[k+1] ^ PCE[1], PCE[k:2]};
assign IndexM = GHRM[k-1:0] ^ {PCM[k+1] ^ PCM[1], PCM[k:2]};
assign IndexW = GHRW[k-1:0] ^ {PCW[k+1] ^ PCW[1], PCW[k:2]};
ram2p1r1wbefix #(2**k, 2) PHT(.clk(clk),
.ce1(~StallF | reset), .ce2(~StallW & ~FlushW),
.ra1(IndexNextF),
.rd1(TableDirPredictionF),
.wa2(IndexW),
.wd2(NewDirPredictionW),
.we2(BranchInstrW & ~StallW & ~FlushW),
.wa2(IndexM),
.wd2(NewDirPredictionM),
.we2(BranchInstrM & ~StallW & ~FlushW),
.bwe2(1'b1));
// if there are non-flushed branches in the pipeline we need to forward the prediction from that stage to the NextF demi stage
// and then register for use in the Fetch stage.
// and then register for use in the Fetch stage.
assign MatchF = BranchInstrF & ~FlushD & (IndexNextF == IndexF);
assign MatchD = BranchInstrD & ~FlushE & (IndexNextF == IndexD);
assign MatchE = BranchInstrE & ~FlushM & (IndexNextF == IndexE);
assign MatchM = BranchInstrM & ~FlushW & (IndexNextF == IndexM);
assign MatchW = BranchInstrW & (IndexNextF == IndexW);
assign MatchNextX = MatchF | MatchD | MatchE | MatchM | MatchW;
assign MatchNextX = MatchF | MatchD | MatchE | MatchM;
flopenr #(1) MatchReg(clk, reset, ~StallF, MatchNextX, MatchXF);
assign ForwardNewDirPrediction = MatchF ? NewDirPredictionF :
MatchD ? NewDirPredictionD :
MatchE ? NewDirPredictionE :
MatchM ? NewDirPredictionM :
NewDirPredictionW;
NewDirPredictionM;
flopenr #(2) ForwardDirPredicitonReg(clk, reset, ~StallF, ForwardNewDirPrediction, ForwardDirPredictionF);
assign DirPredictionF = MatchXF ? ForwardDirPredictionF : TableDirPredictionF;
@ -104,11 +103,11 @@ module speculativegshare
flopenr #(2) PredictionRegE(clk, reset, ~StallE, DirPredictionD, DirPredictionE);
// New prediction pipeline
satCounter2 BPDirUpdateF(.BrDir(DirPredictionF[1]), .OldState(DirPredictionF), .NewState(NewDirPredictionF));
assign NewDirPredictionF = {DirPredictionF[1], DirPredictionF[1]};
flopenr #(2) NewPredDReg(clk, reset, ~StallD, NewDirPredictionF, NewDirPredictionD);
satCounter2 BPDirUpdateE(.BrDir(PCSrcE), .OldState(DirPredictionE), .NewState(NewDirPredictionE));
flopenr #(2) NewPredMReg(clk, reset, ~StallM, NewDirPredictionE, NewDirPredictionM);
flopenr #(2) NewPredWReg(clk, reset, ~StallW, NewDirPredictionM, NewDirPredictionW);
// PCSrc pipeline
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
@ -121,8 +120,11 @@ module speculativegshare
flopenr #(k) GHRFReg(clk, reset, (~StallF) | FlushD, GHRNextF, GHRF);
assign GHRNextD = FlushD ? GHRNextE : {DirPredictionF[1], GHRF};
flopenr #(k+1) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, GHRD);
assign GHRNextD = FlushD ? {GHRNextE, GHRNextE[0]} : {DirPredictionF[1], GHRF, GHRF[0]};
flopenr #(k+2) GHRDReg(clk, reset, (~StallD) | FlushD, GHRNextD, OldGHRD);
assign GHRD = WrongPredInstrClassD[0] & BranchInstrD ? {DirPredictionD[1], OldGHRD[k:1]} : // shift right
WrongPredInstrClassD[0] & ~BranchInstrD ? OldGHRD[k-2:-1] : // shift left
OldGHRD;
assign GHRNextE = FlushE ? GHRNextM : GHRD;
flopenr #(k+1) GHREReg(clk, reset, (~StallE) | FlushE, GHRNextE, OldGHRE);

View File

@ -61,7 +61,7 @@ module csr #(parameter
input logic BTBPredPCWrongM,
input logic RASPredPCWrongM,
input logic PredictionInstrClassWrongM,
input logic [4:0] InstrClassM,
input logic [3:0] InstrClassM,
input logic DCacheMiss,
input logic DCacheAccess,
input logic ICacheMiss,

View File

@ -47,7 +47,7 @@ module csrc #(parameter
input logic BTBPredPCWrongM,
input logic RASPredPCWrongM,
input logic PredictionInstrClassWrongM,
input logic [4:0] InstrClassM,
input logic [3:0] InstrClassM,
input logic DCacheMiss,
input logic DCacheAccess,
input logic ICacheMiss,
@ -87,9 +87,9 @@ module csrc #(parameter
assign CounterEvent[4] = DirPredictionWrongM & InstrValidNotFlushedM;
assign CounterEvent[5] = InstrClassM[0] & InstrValidNotFlushedM;
assign CounterEvent[6] = BTBPredPCWrongM & InstrValidNotFlushedM;
assign CounterEvent[7] = (InstrClassM[4] | InstrClassM[2] | InstrClassM[1]) & InstrValidNotFlushedM;
assign CounterEvent[7] = (InstrClassM[3] | InstrClassM[1]) & InstrValidNotFlushedM;
assign CounterEvent[8] = RASPredPCWrongM & InstrValidNotFlushedM;
assign CounterEvent[9] = InstrClassM[3] & InstrValidNotFlushedM;
assign CounterEvent[9] = InstrClassM[2] & InstrValidNotFlushedM;
assign CounterEvent[10] = PredictionInstrClassWrongM & InstrValidNotFlushedM;
assign CounterEvent[11] = DCacheAccess;
assign CounterEvent[12] = DCacheMiss;

View File

@ -50,7 +50,7 @@ module privileged (
input logic BTBPredPCWrongM, // branch predictor guessed wrong target
input logic RASPredPCWrongM, // return adddress stack guessed wrong target
input logic PredictionInstrClassWrongM, // branch predictor guessed wrong instruction class
input logic [4:0] InstrClassM, // actual instruction class
input logic [3:0] InstrClassM, // actual instruction class
input logic DCacheMiss, // data cache miss
input logic DCacheAccess, // data cache accessed (hit or miss)
input logic ICacheMiss, // instruction cache miss

View File

@ -146,7 +146,7 @@ module wallypipelinedcore (
logic BTBPredPCWrongM;
logic RASPredPCWrongM;
logic PredictionInstrClassWrongM;
logic [4:0] InstrClassM;
logic [3:0] InstrClassM;
logic InstrAccessFaultF, HPTWInstrAccessFaultM;
logic [2:0] LSUHSIZE;
logic [2:0] LSUHBURST;

View File

@ -0,0 +1,251 @@
`include "wally-config.vh"
`define NUM_REGS 32
`define NUM_CSRS 4096
`define PRINT_PC_INSTR 1
`define PRINT_MOST 1
`define PRINT_ALL 0
module rvviTrace #(
parameter int ILEN = `XLEN, // Instruction length in bits
parameter int XLEN = `XLEN, // GPR length in bits
parameter int FLEN = `FLEN, // FPR length in bits
parameter int VLEN = 0, // Vector register size in bits
parameter int NHART = 1, // Number of harts reported
parameter int RETIRE = 1) // Number of instructions that can retire during valid event
();
localparam NUMREGS = `E_SUPPORTED ? 16 : 32;
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushD, FlushE, FlushM, FlushW;
logic TrapM, TrapW;
logic IntrF, IntrD, IntrE, IntrM, IntrW;
logic HaltM, HaltW;
logic [1:0] PrivilegeModeW;
logic [`XLEN-1:0] rf[NUMREGS];
logic [NUMREGS-1:0] rf_wb;
logic [4:0] rf_a3;
logic rf_we3;
logic [`XLEN-1:0] frf[32];
logic [`NUM_REGS-1:0] frf_wb;
logic [4:0] frf_a4;
logic frf_we4;
logic [`XLEN-1:0] CSRArray [logic[11:0]];
logic CSRWriteM, CSRWriteW;
logic [11:0] CSRAdrM, CSRAdrW;
// tracer signals
logic clk;
logic valid;
logic [63:0] order [(NHART-1):0][(RETIRE-1):0];
logic [ILEN-1:0] insn [(NHART-1):0][(RETIRE-1):0];
logic intr [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_rdata [(NHART-1):0][(RETIRE-1):0];
logic [(XLEN-1):0] pc_wdata [(NHART-1):0][(RETIRE-1):0];
logic trap [(NHART-1):0][(RETIRE-1):0];
logic halt [(NHART-1):0][(RETIRE-1):0];
logic [1:0] mode [(NHART-1):0][(RETIRE-1):0];
logic [1:0] ixl [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] x_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] x_wb [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0][(XLEN-1):0] f_wdata [(NHART-1):0][(RETIRE-1):0];
logic [`NUM_REGS-1:0] f_wb [(NHART-1):0][(RETIRE-1):0];
logic [4095:0][(XLEN-1):0] csr [(NHART-1):0][(RETIRE-1):0];
logic [4095:0] csr_wb [(NHART-1):0][(RETIRE-1):0];
logic lrsc_cancel[(NHART-1):0][(RETIRE-1):0];
assign clk = testbench.dut.clk;
// assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet
assign InstrValidD = testbench.dut.core.ieu.c.InstrValidD;
assign InstrValidE = testbench.dut.core.ieu.c.InstrValidE;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCNextF = testbench.dut.core.ifu.PCNextF;
assign PCF = testbench.dut.core.ifu.PCF;
assign PCD = testbench.dut.core.ifu.PCD;
assign PCE = testbench.dut.core.ifu.PCE;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushD = testbench.dut.core.FlushD;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
assign TrapM = testbench.dut.core.TrapM;
assign HaltM = testbench.DCacheFlushStart;
assign PrivilegeModeW = testbench.dut.core.priv.priv.privmode.PrivilegeModeW;
assign STATUS_SXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_SXL;
assign STATUS_UXL = testbench.dut.core.priv.priv.csr.csrsr.STATUS_UXL;
always_comb begin
// machine CSRs
// *** missing PMP and performance counters.
CSRArray[12'h300] = testbench.dut.core.priv.priv.csr.csrm.MSTATUS_REGW;
CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrm.MSTATUSH_REGW;
CSRArray[12'h305] = testbench.dut.core.priv.priv.csr.csrm.MTVEC_REGW;
CSRArray[12'h341] = testbench.dut.core.priv.priv.csr.csrm.MEPC_REGW;
CSRArray[12'h306] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTEREN_REGW;
CSRArray[12'h320] = testbench.dut.core.priv.priv.csr.csrm.MCOUNTINHIBIT_REGW;
CSRArray[12'h302] = testbench.dut.core.priv.priv.csr.csrm.MEDELEG_REGW;
CSRArray[12'h303] = testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
CSRArray[12'h344] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW;
CSRArray[12'h304] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW;
CSRArray[12'h301] = testbench.dut.core.priv.priv.csr.csrm.MISA_REGW;
CSRArray[12'hF14] = testbench.dut.core.priv.priv.csr.csrm.MHARTID_REGW;
CSRArray[12'h340] = testbench.dut.core.priv.priv.csr.csrm.MSCRATCH_REGW;
CSRArray[12'h342] = testbench.dut.core.priv.priv.csr.csrm.MCAUSE_REGW;
CSRArray[12'h343] = testbench.dut.core.priv.priv.csr.csrm.MTVAL_REGW;
CSRArray[12'hF11] = 0;
CSRArray[12'hF12] = 0;
CSRArray[12'hF13] = `XLEN'h100;
CSRArray[12'hF15] = 0;
CSRArray[12'h34A] = 0;
// MCYCLE and MINSTRET
CSRArray[12'hB00] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[0];
CSRArray[12'hB02] = testbench.dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[2];
// supervisor CSRs
CSRArray[12'h100] = testbench.dut.core.priv.priv.csr.csrs.SSTATUS_REGW;
CSRArray[12'h104] = testbench.dut.core.priv.priv.csr.csrm.MIE_REGW & 12'h222;
CSRArray[12'h105] = testbench.dut.core.priv.priv.csr.csrs.STVEC_REGW;
CSRArray[12'h141] = testbench.dut.core.priv.priv.csr.csrs.SEPC_REGW;
CSRArray[12'h106] = testbench.dut.core.priv.priv.csr.csrs.SCOUNTEREN_REGW;
CSRArray[12'h180] = testbench.dut.core.priv.priv.csr.csrs.SATP_REGW;
CSRArray[12'h140] = testbench.dut.core.priv.priv.csr.csrs.csrs.SSCRATCH_REGW;
CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
// user CSRs
CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.FRM_REGW;
CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
end
genvar index;
assign rf[0] = '0;
for(index = 1; index < NUMREGS; index += 1)
assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index];
assign rf_a3 = testbench.dut.core.ieu.dp.regf.a3;
assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3;
always_comb begin
rf_wb <= '0;
if(rf_we3)
rf_wb[rf_a3] <= 1'b1;
end
for(index = 0; index < NUMREGS; index += 1)
assign frf[index] = testbench.dut.core.fpu.fpu.fregfile.rf[index];
assign frf_a4 = testbench.dut.core.fpu.fpu.fregfile.a4;
assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4;
always_comb begin
frf_wb <= '0;
if(frf_we4)
frf_wb[frf_a4] <= 1'b1;
end
assign CSRAdrM = testbench.dut.core.priv.priv.csr.CSRAdrM;
assign CSRWriteM = testbench.dut.core.priv.priv.csr.CSRWriteM;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW);
flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW);
flopenrc #(1) IntrFReg (clk, reset, 1'b0, ~StallF, TrapM, IntrF);
flopenrc #(1) IntrDReg (clk, reset, FlushD, ~StallD, IntrF, IntrD);
flopenrc #(1) IntrEReg (clk, reset, FlushE, ~StallE, IntrD, IntrE);
flopenrc #(1) IntrMReg (clk, reset, FlushM, ~StallM, IntrE, IntrM);
flopenrc #(1) IntrWReg (clk, reset, FlushW, ~StallW, IntrM, IntrW);
flopenrc #(12) CSRAdrWReg (clk, reset, FlushW, ~StallW, CSRAdrM, CSRAdrW);
flopenrc #(1) CSRWriteWReg (clk, reset, FlushW, ~StallW, CSRWriteM, CSRWriteW);
// Initially connecting the writeback stage signals, but may need to use M stage
// and gate on ~FlushW.
assign valid = InstrValidW & ~StallW & ~FlushW;
assign order[0][0] = CSRArray[12'hB02];
assign insn[0][0] = InstrRawW;
assign pc_rdata[0][0] = PCW;
assign trap[0][0] = TrapW;
assign halt[0][0] = HaltW;
assign intr[0][0] = IntrW;
assign mode[0][0] = PrivilegeModeW;
assign ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 :
PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL;
assign pc_wdata[0][0] = ~FlushW ? PCM :
~FlushM ? PCE :
~FlushE ? PCD :
~FlushD ? PCF : PCNextF;
for(index = 0; index < `NUM_REGS; index += 1) begin
assign x_wdata[0][0][index] = rf[index];
assign x_wb[0][0][index] = rf_wb[index];
assign f_wdata[0][0][index] = frf[index];
assign f_wb[0][0][index] = frf_wb[index];
end
always_comb begin
csr_wb[0][0] <= '0;
if(CSRWriteW)
csr_wb[0][0][CSRAdrW] <= 1'b1;
end
integer index3;
always_comb begin
for(index3 = 0; index3 < `NUM_CSRS; index3 += 1) begin
if(CSRArray.exists(index3))
csr[0][0][index3] = CSRArray[index3];
else
csr[0][0][index3] = '0;
end
end
// *** implementation only cancel? so sc does not clear?
assign lrsc_cancel[0][0] = '0;
integer index2;
always_ff @(posedge clk) begin
if(valid) begin
if(`PRINT_PC_INSTR & !(`PRINT_ALL | `PRINT_MOST))
$display("order = %08d, PC = %08x, insn = %08x", order[0][0], pc_rdata[0][0], insn[0][0]);
else if(`PRINT_MOST & !`PRINT_ALL)
$display("order = %08d, PC = %010x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %010x, x%02d = %016x, f%02d = %016x, csr%03x = %016x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0], rf_a3, x_wdata[0][0][rf_a3], frf_a4, f_wdata[0][0][frf_a4], CSRAdrW, csr[0][0][CSRAdrW]);
else if(`PRINT_ALL) begin
$display("order = %08d, PC = %08x, insn = %08x, trap = %1d, halt = %1d, intr = %1d, mode = %1x, ixl = %1x, pc_wdata = %08x",
order[0][0], pc_rdata[0][0], insn[0][0], trap[0][0], halt[0][0], intr[0][0], mode[0][0], ixl[0][0], pc_wdata[0][0]);
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("x%02d = %08x", index2, x_wdata[0][0][index2]);
end
for(index2 = 0; index2 < `NUM_REGS; index2 += 1) begin
$display("f%02d = %08x", index2, f_wdata[0][0][index2]);
end
end
end
if(HaltW) $stop();
end
endmodule

View File

@ -510,7 +510,6 @@ logic [3:0] dummy;
end
end
endmodule
module riscvassertions;
@ -692,3 +691,56 @@ task automatic updateProgramAddrLabelArray;
$fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP);
endtask
`define NUM_REGS 32
`define NUM_CSRS 4096
module rvviTrace();
// wally specific signals
logic reset;
logic [`XLEN-1:0] PCM, PCW;
logic [`XLEN-1:0] InstrRawD, InstrRawE, InstrRawM, InstrRawW;
logic InstrValidM, InstrValidW;
logic StallE, StallM, StallW;
logic FlushE, FlushM, FlushW;
// tracer signals
logic clk;
logic valid;
logic [`XLEN-1:0] insn;
logic [`XLEN-1:0 ] pc_rdata;
assign clk = testbench.dut.clk;
assign InstrValidM = testbench.dut.core.ieu.InstrValidM;
assign InstrRawD = testbench.dut.core.ifu.InstrRawD;
assign PCM = testbench.dut.core.ifu.PCM;
assign reset = testbench.reset;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign StallW = testbench.dut.core.StallW;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign FlushW = testbench.dut.core.FlushW;
// pipeline to writeback stage
flopenrc #(`XLEN) InstrRawEReg (clk, reset, FlushE, ~StallE, InstrRawD, InstrRawE);
flopenrc #(`XLEN) InstrRawMReg (clk, reset, FlushM, ~StallM, InstrRawE, InstrRawM);
flopenrc #(`XLEN) InstrRawWReg (clk, reset, FlushW, ~StallW, InstrRawM, InstrRawW);
flopenrc #(`XLEN) PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW);
flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW);
assign valid = InstrValidW;
assign insn = InstrRawW;
assign pc_rdata = PCW;
always_ff @(posedge clk) begin
if(valid) begin
$display("PC = %x, insn = %x", pc_rdata, insn);
end
end
endmodule

View File

@ -0,0 +1,439 @@
///////////////////////////////////////////
// testbench.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs from the riscv-arch-test and Imperas suites
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module testbench;
parameter DEBUG=0;
logic clk;
logic reset_ext, reset;
logic [`XLEN-1:0] testadr, testadrNoBase;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
logic [3:0] dummy;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [`PA_BITS-1:0] HADDR;
logic [`AHBW-1:0] HWDATA;
logic [`XLEN/8-1:0] HWSTRB;
logic HWRITE;
logic [2:0] HSIZE;
logic [2:0] HBURST;
logic [3:0] HPROT;
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
string ProgramAddrMapFile, ProgramLabelMapFile;
integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
logic DCacheFlushDone, DCacheFlushStart;
string testName;
string memfilename, pathname, adrstr;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
logic SDCCLK;
logic SDCCmdIn;
logic SDCCmdOut;
logic SDCCmdOE;
logic [3:0] SDCDatIn;
tri1 [3:0] SDCDat;
tri1 SDCCmd;
logic HREADY;
logic HSELEXT;
logic InitializingMemories;
integer ResetCount, ResetThreshold;
logic InReset;
// Imperas look here.
initial
begin
ResetCount = 0;
ResetThreshold = 2;
InReset = 1;
testadr = 0;
testadrNoBase = 0;
//testName = "rv64i_m/I/src/add-01.S";
testName = "rv64i_m/privilege/src/WALLY-mmu-sv48-01.S";
//pathname = "../../tests/riscof/work/riscv-arch-test/";
pathname = "../../tests/riscof/work/wally-riscv-arch-test/";
memfilename = {pathname, testName, "/ref/ref.elf.memfile"};
if (`BUS) $readmemh(memfilename, dut.uncore.uncore.ram.ram.memory.RAM);
else $error("Imperas test bench requires BUS.");
ProgramAddrMapFile = {pathname, testName, "/ref/ref.elf.objdump.addr"};
ProgramLabelMapFile = {pathname, testName, "/ref/ref.elf.objdump.lab"};
// declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
// to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
end
rvviTrace rvviTrace();
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
// check assertions for a legal configuration
riscvassertions riscvassertions();
// instantiate device to be tested
assign GPIOPinsIn = 0;
assign UARTSin = 1;
if(`EXT_MEM_SUPPORTED) begin
ram_ahb #(.BASE(`EXT_MEM_BASE), .RANGE(`EXT_MEM_RANGE))
ram (.HCLK, .HRESETn, .HADDR, .HWRITE, .HTRANS, .HWDATA, .HSELRam(HSELEXT),
.HREADRam(HRDATAEXT), .HREADYRam(HREADYEXT), .HRESPRam(HRESPEXT), .HREADY,
.HWSTRB);
end else begin
assign HREADYEXT = 1;
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
end
if(`FPGA) begin : sdcard
sdModel sdcard
(.sdClk(SDCCLK),
.cmd(SDCCmd),
.dat(SDCDat));
assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz;
assign SDCCmdIn = SDCCmd;
assign SDCDatIn = SDCDat;
end else begin
assign SDCCmd = '0;
assign SDCDat = '0;
end
wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT,
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
dut.core.ifu.FinalInstrRawF[31:0],
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
dut.core.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
// if ($time % 100000 == 0) $display("Time is %0t", $time);
end
// check results
assign reset_ext = InReset;
always @(negedge clk)
begin
InitializingMemories = 0;
if(InReset == 1) begin
// once the test inidicates it's done we need to immediately hold reset for a number of cycles.
if(ResetCount < ResetThreshold) ResetCount = ResetCount + 1;
else begin // hit reset threshold so we remove reset.
InReset = 0;
ResetCount = 0;
end
end
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : FunctionName
FunctionName FunctionName(.reset(reset),
.clk(clk),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// Termination condition
// terminate on a specific ECALL after li x3,1 for old Imperas tests, *** remove this when old imperas tests are removed
// or sw gp,-56(t0) for new Imperas tests
// or sd gp, -56(t0)
// or on a jump to self infinite loop (6f) for RISC-V Arch tests
logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
if (`ZICSR_SUPPORTED) assign ecf = dut.core.priv.priv.EcallFaultM;
else assign ecf = 0;
assign DCacheFlushStart = ecf &
(dut.core.ieu.dp.regf.rf[3] == 1 |
(dut.core.ieu.dp.regf.we3 &
dut.core.ieu.dp.regf.a3 == 3 &
dut.core.ieu.dp.regf.wd3 == 1)) |
((dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM ) |
((dut.core.lsu.IEUAdrM == ProgramAddrLabelArray["tohost"]) & InstrMName == "SW" );
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
.reset(reset),
.start(DCacheFlushStart),
.done(DCacheFlushDone));
// initialize the branch predictor
if (`BPRED_ENABLED == 1)
begin
genvar adrindex;
// Initializing all zeroes into the branch predictor memory.
for(adrindex = 0; adrindex < 1024; adrindex++) begin
initial begin
force dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem[adrindex] = 0;
force dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem[adrindex] = 0;
#1;
release dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem[adrindex];
release dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem[adrindex];
end
end
end
// check for hange up.
logic [`XLEN-1:0] OldPCW;
integer WatchDogTimerCount;
localparam WatchDogTimerThreshold = 1000000;
logic WatchDogTimeOut;
always_ff @(posedge clk) begin
OldPCW <= PCW;
if(OldPCW == PCW) WatchDogTimerCount = WatchDogTimerCount + 1'b1;
else WatchDogTimerCount = '0;
end
always_comb begin
WatchDogTimeOut = WatchDogTimerCount >= WatchDogTimerThreshold;
if(WatchDogTimeOut) begin
$display("FAILURE: Watch Dog Time Out triggered. PCW stuck at %x for more than %d cycles", PCW, WatchDogTimerCount);
$stop;
end
end
endmodule
module riscvassertions;
initial begin
assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
assert (`IDIV_BITSPERCYCLE == 1 | `IDIV_BITSPERCYCLE==2 | `IDIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: IDIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double fp (D) without supporting float (F)");
assert (`D_SUPPORTED | ~`Q_SUPPORTED) else $error("Can't support quad fp (Q) without supporting double (D)");
assert (`F_SUPPORTED | ~`ZFH_SUPPORTED) else $error("Can't support half-precision fp (ZFH) without supporting float (F)");
assert (`DCACHE | ~`F_SUPPORTED | `FLEN <= `XLEN) else $error("Data cache required to support FLEN > XLEN because AHB bus width is XLEN");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`FLEN<=`XLEN | `DCACHE | `DTIM_SUPPORTED) else $error("Wally does not support FLEN > XLEN unleses data cache or DTIM is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (!`DCACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (!`ICACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`ICACHE_LINELENINBITS >= 32 | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled");
assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size");
assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (!`DCACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (!`ICACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (!`ICACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2");
assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2");
assert (`UNCORE_RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if UNCORE_RAM_RANGE is less than 56'h07FFFFFF");
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
assert (`VIRTMEM_SUPPORTED == 0 | (`DTIM_SUPPORTED == 0 & `IROM_SUPPORTED == 0)) else $error("Can't simultaneously have virtual memory and DTIM_SUPPORTED/IROM_SUPPORTED because local memories don't translate addresses");
assert (`DCACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`ICACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
assert ((`DCACHE == 0 & `ICACHE == 0) | `BUS) else $error("Dcache and Icache requires DBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (!`DCACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
assert (`DCACHE | `A_SUPPORTED == 0) else $error("Atomic extension (A) requires cache on Wally.");
assert (`IDIV_ON_FPU == 0 | `F_SUPPORTED) else $error("IDIV on FPU needs F_SUPPORTED");
end
// *** DH 8/23/
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */
module DCacheFlushFSM
(input logic clk,
input logic reset,
input logic start,
output logic done);
genvar adr;
logic [`XLEN-1:0] ShadowRAM[`UNCORE_RAM_BASE>>(1+`XLEN/32):(`UNCORE_RAM_RANGE+`UNCORE_RAM_BASE)>>1+(`XLEN/32)];
if(`DCACHE) begin
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
localparam integer linelen = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN;
localparam integer sramlen = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].SRAMLEN;
localparam integer cachesramwords = testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[0].NUMSRAM;
//testbench.dut.core.lsu.bus.dcache.dcache.CacheWays.NUMSRAM;
localparam integer numwords = sramlen/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer loglinebytelen = $clog2(linebytelen);
localparam integer lognumways = $clog2(numways);
localparam integer tagstart = lognumlines + loglinebytelen;
genvar index, way, cacheWord;
logic [sramlen-1:0] CacheData [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [sramlen-1:0] cacheline;
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheValid [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [cachesramwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < cachesramwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen), .sramlen(sramlen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS-1-tagstart:0]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
// these dirty bit selections would be needed if dirty is moved inside the tag array.
//.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]),
//.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][`PA_BITS+tagstart]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.RAM[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
end
integer i, j, k, l;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(l = 0; l < cachesramwords; l++) begin
if (CacheValid[j][i][l] & CacheDirty[j][i][l]) begin
for(k = 0; k < numwords; k++) begin
//cacheline = CacheData[j][i][0];
// does not work with modelsim
// # ** Error: ../testbench/testbench.sv(483): Range must be bounded by constant expressions.
// see https://verificationacademy.com/forums/systemverilog/range-must-be-bounded-constant-expressions
//ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = cacheline[`XLEN*(k+1)-1:`XLEN*k];
ShadowRAM[(CacheAdr[j][i][l] >> $clog2(`XLEN/8)) + k] = CacheData[j][i][l][`XLEN*k +: `XLEN];
end
end
end
end
end
end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule
module copyShadow
#(parameter tagstart, loglinebytelen, sramlen)
(input logic clk,
input logic start,
input logic [`PA_BITS-1:tagstart] tag,
input logic valid, dirty,
input logic [sramlen-1:0] data,
input logic [32-1:0] index,
input logic [32-1:0] cacheWord,
output logic [sramlen-1:0] CacheData,
output logic [`PA_BITS-1:0] CacheAdr,
output logic [`XLEN-1:0] CacheTag,
output logic CacheValid,
output logic CacheDirty);
always_ff @(posedge clk) begin
if(start) begin
CacheTag = tag;
CacheValid = valid;
CacheDirty = dirty;
CacheData = data;
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(sramlen/8));
end
end
endmodule
task automatic updateProgramAddrLabelArray;
input string ProgramAddrMapFile, ProgramLabelMapFile;
inout integer ProgramAddrLabelArray [string];
// Gets the memory location of begin_signature
integer ProgramLabelMapFP, ProgramAddrMapFP;
ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMapFP & ProgramAddrMapFP) begin // check we found both files
while (!$feof(ProgramLabelMapFP)) begin
string label, adrstr;
integer returncode;
returncode = $fscanf(ProgramLabelMapFP, "%s\n", label);
returncode = $fscanf(ProgramAddrMapFP, "%s\n", adrstr);
if (ProgramAddrLabelArray.exists(label))
ProgramAddrLabelArray[label] = adrstr.atohex();
end
end
$fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP);
endtask