Merge branch 'openhwgroup:main' into illegal_specific

This commit is contained in:
Kevin Kim 2023-03-07 14:06:22 -08:00 committed by GitHub
commit f29bbe5f69
15 changed files with 247 additions and 61 deletions

3
.gitmodules vendored
View File

@ -21,3 +21,6 @@
[submodule "addins/coremark"]
path = addins/coremark
url = https://github.com/eembc/coremark
[submodule "addins/branch-predictor-simulator"]
path = addins/branch-predictor-simulator
url = https://github.com/synxlin/branch-predictor-simulator.git

@ -0,0 +1 @@
Subproject commit af0c6f8cb62f48ee43e74c21e799102e03951ce2

View File

@ -39,20 +39,20 @@ def ComputeCPI(benchmark):
def ComputeBranchDirMissRate(benchmark):
'Computes and inserts branch direction miss prediction rate.'
(nameString, opt, dataDict) = benchmark
branchDirMissRate = 100.0 * int(dataDict['Br Dir Wrong']) / int(dataDict['Br Count'])
branchDirMissRate = 100.0 * int(dataDict['BP Dir Wrong']) / int(dataDict['Br Count'])
dataDict['BDMR'] = branchDirMissRate
def ComputeBranchTargetMissRate(benchmark):
'Computes and inserts branch target miss prediction rate.'
# *** this is wrong in the verilog test bench
(nameString, opt, dataDict) = benchmark
branchTargetMissRate = 100.0 * int(dataDict['Br Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump, JR, Jal']) + int(dataDict['ret']))
branchTargetMissRate = 100.0 * int(dataDict['BP Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump Not Return']))
dataDict['BTMR'] = branchTargetMissRate
def ComputeRASMissRate(benchmark):
'Computes and inserts return address stack miss prediction rate.'
(nameString, opt, dataDict) = benchmark
RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['ret'])
RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['Return'])
dataDict['RASMPR'] = RASMPR
def ComputeInstrClassMissRate(benchmark):
@ -70,7 +70,9 @@ def ComputeICacheMissRate(benchmark):
def ComputeICacheMissTime(benchmark):
'Computes and inserts instruction class miss prediction rate.'
(nameString, opt, dataDict) = benchmark
ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / int(dataDict['I Cache Miss'])
cycles = int(dataDict['I Cache Miss'])
if(cycles == 0): ICacheMR = 0
else: ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / cycles
dataDict['ICacheMT'] = ICacheMR
def ComputeDCacheMissRate(benchmark):
@ -82,8 +84,10 @@ def ComputeDCacheMissRate(benchmark):
def ComputeDCacheMissTime(benchmark):
'Computes and inserts instruction class miss prediction rate.'
(nameString, opt, dataDict) = benchmark
ICacheMR = 100.0 * int(dataDict['D Cache Cycles']) / int(dataDict['D Cache Miss'])
dataDict['DCacheMT'] = ICacheMR
cycles = int(dataDict['D Cache Miss'])
if(cycles == 0): DCacheMR = 0
else: DCacheMR = 100.0 * int(dataDict['D Cache Cycles']) / cycles
dataDict['DCacheMT'] = DCacheMR
def ComputeAll(benchmarks):
for benchmark in benchmarks:
@ -217,9 +221,9 @@ if(sys.argv[1] == '-b'):
for benchmark in benchmarkAll:
(name, opt, config, dataDict) = benchmark
if name+'_'+opt in benchmarkDict:
benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR']))
benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR']))
else:
benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])]
benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])]
size = len(benchmarkDict)
index = 1

View File

@ -266,7 +266,7 @@ connect_debug_port u_ila_0/probe50 [get_nets [list wallypipelinedsoc/uncore.unco
create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe51]
set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe51]
connect_debug_port u_ila_0/probe51 [get_nets [list wallypipelinedsoc/core/hzu/BPPredWrongE ]]
connect_debug_port u_ila_0/probe51 [get_nets [list wallypipelinedsoc/core/hzu/BPWrongE ]]
create_debug_port u_ila_0 probe
set_property port_width 1 [get_debug_ports u_ila_0/probe52]

130
sim/bpred-sim.py Executable file
View File

@ -0,0 +1,130 @@
#!/usr/bin/python3
##################################
#
# regression-wally
# David_Harris@Hmc.edu 25 January 2021
# Modified by Jarred Allen <jaallen@g.hmc.edu>
#
# Run a regression with multiple configurations in parallel and exit with
# non-zero status code if an error happened, as well as printing human-readable
# output.
#
##################################
import sys,os,shutil
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
from collections import namedtuple
regressionDir = os.path.dirname(os.path.abspath(__file__))
os.chdir(regressionDir)
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
# name: the name of this test configuration (used in printing human-readable
# output and picking logfile names)
# cmd: the command to run to test (should include the logfile as '{}', and
# the command needs to write to that file)
# grepstr: the string to grep through the log file for. The test succeeds iff
# grep finds that string in the logfile (is used by grep, so it may
# be any pattern grep accepts, see `man 1 grep` for more info).
# edit this list to add more test cases
configs = [
TestCase(
name="lints",
variant="all",
cmd="./lint-wally | tee {}",
grepstr="All lints run with no errors or warnings"
)
]
bpdSize = [6, 8, 10, 12, 14, 16]
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic']
for CurrBPType in bpdType:
for CurrBPSize in bpdSize:
name = CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize)
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
import os
from multiprocessing import Pool, TimeoutError
def search_log_for_text(text, logfile):
"""Search through the given log file for text, returning True if it is found or False if it is not"""
grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile)
return os.system(grepcmd) == 0
def run_test_case(config):
"""Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
logname = "logs/"+config.variant+"_"+config.name+".log"
cmd = config.cmd.format(logname)
print(cmd)
os.chdir(regressionDir)
os.system(cmd)
if search_log_for_text(config.grepstr, logname):
print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name))
return 0
else:
print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name))
print(" Check %s" % logname)
return 1
def main():
"""Run the tests and count the failures"""
TIMEOUT_DUR = 10800 # 3 hours
global configs
try:
os.chdir(regressionDir)
os.mkdir("logs")
#print(os.getcwd())
#print(regressionDir)
except:
pass
try:
shutil.rmtree("wkdir")
except:
pass
finally:
os.mkdir("wkdir")
if '-makeTests' in sys.argv:
os.chdir(regressionDir)
os.system('./make-tests.sh | tee ./logs/make-tests.log')
# Scale the number of concurrent processes to the number of test cases, but
# max out at a limited number of concurrent processes to not overwhelm the system
with Pool(processes=min(len(configs),40)) as pool:
num_fail = 0
results = {}
for config in configs:
results[config] = pool.apply_async(run_test_case,(config,))
for (config,result) in results.items():
try:
num_fail+=result.get(timeout=TIMEOUT_DUR)
except TimeoutError:
num_fail+=1
print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
# Count the number of failures
if num_fail:
print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
else:
print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}")
return num_fail
if __name__ == '__main__':
exit(main())

View File

@ -26,6 +26,14 @@ if {$2 eq "ahb"} {
vdel -lib wkdir/work_${1}_${2}_${3}_${4} -all
}
vlib wkdir/work_${1}_${2}_${3}_${4}
} elseif {$2 eq "configOptions"} {
if [file exists wkdir/work_${1}_${3}_${4}] {
vdel -lib wkdir/work_${1}_${3}_${4} -all
}
vlib wkdir/work_${1}_${3}_${4}
} else {
if [file exists wkdir/work_${1}_${2}] {
vdel -lib wkdir/work_${1}_${2} -all
@ -76,6 +84,30 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
# power add -r /dut/core/*
run -all
# power off -r /dut/core/*
} elseif {$2 eq "configOptions"} {
# set arguments " "
# for {set i 5} {$i <= $argc} {incr i} {
# append arguments "\$$i "
# }
# puts $arguments
# set options eval $arguments
# **** fix this so we can pass any number of +defines.
# only allows 3 right now
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt
vsim -lib wkdir/work_${1}_${3}_${4} testbenchopt -fatal 7 -suppress 3829
# Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time
#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
#vsim -coverage -lib work_$2 workopt_$2
# power add generates the logging necessary for said generation.
# power add -r /dut/core/*
run -all
# power off -r /dut/core/*
} else {
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
# start and run simulation

View File

@ -6,6 +6,17 @@ add wave -noupdate /testbench/reset
add wave -noupdate /testbench/reset_ext
add wave -noupdate /testbench/memfilename
add wave -noupdate /testbench/dut/core/SATP_REGW
add wave -noupdate /testbench/FunctionName/FunctionName/PCD
add wave -noupdate /testbench/FunctionName/FunctionName/PCE
add wave -noupdate /testbench/FunctionName/FunctionName/PCF
add wave -noupdate /testbench/FunctionName/FunctionName/PCM
add wave -noupdate /testbench/FunctionName/FunctionName/PCM_temp
add wave -noupdate /testbench/FunctionName/FunctionName/PCMOld
add wave -noupdate /testbench/dut/core/InstrValidM
add wave -noupdate /testbench/FunctionName/FunctionName/FunctionAddr
add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrIndex
add wave -noupdate /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrMapLineCount
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM
add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
@ -55,11 +66,12 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE
add wave -noupdate -expand -group {Memory Stage} /testbench/FunctionName/FunctionName/FunctionName
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrValidM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
@ -622,10 +634,9 @@ add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAcc
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/HPMCSample/FinalHPMCOUNTERH
add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {717301 ns} 0} {{Cursor 5} {394987 ns} 1}
WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {49231900 ns} 0} {{Cursor 5} {394987 ns} 1}
quietly wave cursor active 4
configure wave -namecolwidth 250
configure wave -valuecolwidth 194
@ -641,4 +652,4 @@ configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {717254 ns} {717585 ns}
WaveRestoreZoom {49231842 ns} {49231960 ns}

View File

@ -47,7 +47,7 @@ module ifu (
output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU
output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU
output logic [`XLEN-1:0] PCFSpill, // PCF with possible + 2 to handle spill to HPTW
output logic [`XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW
// Execute
output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address)
input logic PCSrcE, // Executation stage branch is taken
@ -101,7 +101,7 @@ module ifu (
logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4
logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed)
logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed)
logic [`XLEN-1:0] PCNextFSpill; // Next PCF after possible + 2 to handle spill
logic [`XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill
logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j)
logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F
logic [`XLEN-1:0] PCD; // Decode stage instruction address
@ -126,7 +126,7 @@ module ifu (
logic CacheableF; // PMA indicates instruction address is cacheable
logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF
logic SelSpillNextF; // In a spill, stall pipeline and gate local stallF
logic BusStall; // Bus interface busy with multicycle operation
logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation
logic GatedStallD; // StallD gated by selected next spill
@ -136,7 +136,7 @@ module ifu (
logic CacheCommittedF; // I$ memory operation started, delay interrupts
logic SelIROM; // PMA indicates instruction address is in the IROM
assign PCFExt = {2'b00, PCFSpill};
assign PCFExt = {2'b00, PCSpillF};
/////////////////////////////////////////////////////////////////////////////////////////////
// Spill Support
@ -144,12 +144,12 @@ module ifu (
if(`C_SUPPORTED) begin : Spill
spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF,
.InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF);
.InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF);
end else begin : NoSpill
assign PCNextFSpill = PCNextF;
assign PCFSpill = PCF;
assign PCSpillNextF = PCNextF;
assign PCSpillF = PCF;
assign PostSpillInstrRawF = InstrRawF;
assign {SelNextSpillF, CompressedF} = 0;
assign {SelSpillNextF, CompressedF} = 0;
end
////////////////////////////////////////////////////////////////////////////////////////////////
@ -213,7 +213,7 @@ module ifu (
logic IROMce;
assign IROMce = ~GatedStallD | reset;
assign IFURWF = 2'b10;
irom irom(.clk, .ce(IROMce), .Adr(PCNextFSpill[`XLEN-1:0]), .IROMInstrF);
irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[`XLEN-1:0]), .IROMInstrF);
end else begin
assign IFURWF = 2'b10;
end
@ -245,7 +245,7 @@ module ifu (
.CacheWriteData('0),
.CacheRW(CacheRWF),
.CacheAtomic('0), .FlushCache('0),
.NextAdr(PCNextFSpill[11:0]),
.NextAdr(PCSpillNextF[11:0]),
.PAdr(PCPF),
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW)
@ -286,8 +286,8 @@ module ifu (
end
assign IFUCacheBusStallD = ICacheStallF | BusStall;
assign IFUStallF = IFUCacheBusStallD | SelNextSpillF;
assign GatedStallD = StallD & ~SelNextSpillF;
assign IFUStallF = IFUCacheBusStallD | SelSpillNextF;
assign GatedStallD = StallD & ~SelSpillNextF;
flopenl #(32) AlignedInstrRawDFlop(clk, reset | FlushD, ~StallD, PostSpillInstrRawF, nop, InstrRawD);

View File

@ -43,9 +43,9 @@ module spill #(
input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
input logic ITLBMissF, // ITLB miss, ignore memory request
input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active)
output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill
output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill
output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline
output logic [`XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill
output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill
output logic SelSpillNextF, // During the transition between the two spill operations, the IFU should stall the pipeline
output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
output logic CompressedF); // The fetched instruction is compressed
@ -57,8 +57,8 @@ module spill #(
logic TakeSpillF;
logic SpillF;
logic SelSpillF;
logic SpillSaveF;
logic [15:0] InstrFirstHalf;
logic SpillSaveF;
logic [15:0] InstrFirstHalfF;
////////////////////////////////////////////////////////////////////////////////////////////////////
// PC logic
@ -67,9 +67,9 @@ module spill #(
// compute PCF+2 from the raw PC+4
mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F));
// select between PCNextF and PCF+2
mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill));
mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelSpillNextF & ~FlushD), .y(PCSpillNextF));
// select between PCF and PCF+2
mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill));
mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF));
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -94,7 +94,7 @@ module spill #(
end
assign SelSpillF = (CurrState == STATE_SPILL);
assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD);
assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD);
assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD;
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -102,10 +102,10 @@ module spill #(
////////////////////////////////////////////////////////////////////////////////////////////////////
// save the first 2 bytes
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalf);
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF);
// merge together
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalf}, SpillF, PostSpillInstrRawF);
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF);
// Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x
always_comb

View File

@ -80,7 +80,7 @@ module lsu (
input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege
input logic [1:0] STATUS_MPP, // Machine previous privilege mode
input logic [`XLEN-1:0] PCFSpill, // Fetch PC
input logic [`XLEN-1:0] PCSpillF, // Fetch PC
input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits
output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB
@ -152,7 +152,7 @@ module lsu (
if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED
hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF,
.DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM,
.FlushW, .DCacheStallM, .SATP_REGW, .PCFSpill,
.FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW,
.ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN
.WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,

View File

@ -34,7 +34,7 @@
module hptw (
input logic clk, reset,
input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table
input logic [`XLEN-1:0] PCFSpill, // addresses to translate
input logic [`XLEN-1:0] PCSpillF, // addresses to translate
input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate
input logic [1:0] MemRWM, AtomicM,
// system status
@ -111,7 +111,7 @@ module hptw (
assign TLBMiss = (DTLBMissOrUpdateDAM | ITLBMissOrUpdateDAF);
// Determine which address to translate
mux2 #(`XLEN) vadrmux(PCFSpill, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr);
mux2 #(`XLEN) vadrmux(PCSpillF, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr);
assign CurrentPPN = PTE[`PPN_BITS+9:10];
// State flops

View File

@ -63,7 +63,7 @@ module wallypipelinedcore (
logic [2:0] Funct3E;
logic [31:0] InstrD;
logic [31:0] InstrM;
logic [`XLEN-1:0] PCFSpill, PCE, PCLinkE;
logic [`XLEN-1:0] PCSpillF, PCE, PCLinkE;
logic [`XLEN-1:0] PCM;
logic [`XLEN-1:0] CSRReadValW, MDUResultW;
logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF;
@ -170,7 +170,7 @@ module wallypipelinedcore (
.InstrValidM, .InstrValidE, .InstrValidD,
.BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF,
// Fetch
.HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF,
.HRDATA, .PCSpillF, .IFUHADDR, .PC2NextF,
.IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE,
.ICacheAccess, .ICacheMiss,
// Execute
@ -241,7 +241,7 @@ module wallypipelinedcore (
.StoreAmoMisalignedFaultM, // connects to privilege
.StoreAmoAccessFaultM, // connects to privilege
.InstrUpdateDAF,
.PCFSpill, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW,
.PCSpillF, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW,
.LSUStallM);
if(`BUS_SUPPORTED) begin : ebu

View File

@ -35,22 +35,29 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile);
string FunctionName;
logic [`XLEN-1:0] PCF, PCD, PCE, FunctionAddr;
logic StallD, StallE, FlushD, FlushE;
logic [`XLEN-1:0] PCF, PCD, PCE, PCM, FunctionAddr, PCM_temp, PCMOld;
logic StallD, StallE, StallM, FlushD, FlushE, FlushM;
logic InstrValidM;
integer ProgramAddrIndex, ProgramAddrIndexQ;
assign PCF = testbench.dut.core.ifu.PCF;
assign StallD = testbench.dut.core.StallD;
assign StallE = testbench.dut.core.StallE;
assign StallM = testbench.dut.core.StallM;
assign FlushD = testbench.dut.core.FlushD;
assign FlushE = testbench.dut.core.FlushE;
assign FlushM = testbench.dut.core.FlushM;
assign InstrValidM = testbench.dut.core.InstrValidM;
// copy from ifu
// when the F and D stages are flushed we need to ensure the PCE is held so that the function name does not
// erroneously change.
flopenrc #(`XLEN) PCDReg(clk, reset, 1'b0, ~StallD, FlushE & FlushD ? PCE : PCF, PCD);
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushE ? PCE : PCD, PCE);
// also need to hold the old value not an erroneously fetched PC.
flopenr #(`XLEN) PCDReg(clk, reset, ~StallD, FlushD ? PCE : PCF, PCD);
flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushD & FlushE ? PCF : FlushE ? PCE : PCD, PCE);
flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, FlushD & FlushE & FlushM ? PCF : FlushE & FlushM ? PCE : FlushM ? PCM : PCE, PCM_temp);
flopenr #(`XLEN) PCMOldReg(clk, reset, InstrValidM, PCM_temp, PCMOld);
assign PCM = InstrValidM ? PCM_temp : PCMOld;
task automatic bin_search_min;
@ -111,7 +118,11 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile);
// preload
// initial begin
always @ (posedge reset) begin
always @ (negedge reset) begin
// clear out the old mapping between programs.
foreach(ProgramAddrMapMemory[i]) ProgramAddrMapMemory.delete(i);
foreach(ProgramLabelMapMemory[i]) ProgramLabelMapMemory.delete(i);
$readmemh(ProgramAddrMapFile, ProgramAddrMapMemory);
// we need to count the number of lines in the file so we can set FunctionRadixLineCount.
@ -147,11 +158,11 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile);
$display("Cannot open file %s for reading.", ProgramLabelMapFile);
end
$fclose(ProgramLabelMapFP);
end
always @(PCE) begin
bin_search_min(PCE, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex);
always @(PCM) begin
bin_search_min(PCM, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex);
end
logic OrReducedAdr, AnyUnknown;

View File

@ -415,7 +415,6 @@ logic [3:0] dummy;
logic StartSample;
logic EndSample, EndSampleFirst, EndSampleDelayed;
logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0];
logic [`XLEN-1:0] FinalHPMCOUNTERH[`COUNTERS-1:0];
string HPMCnames[] = '{"Mcycle",
"------",
@ -471,11 +470,6 @@ logic [3:0] dummy;
InitialHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex];
end
end
if(EndSample) begin
for(HPMCindex = 0; HPMCindex < 32; HPMCindex += 1) begin
FinalHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex];
end
end
if(EndSample) begin
for(HPMCindex = 0; HPMCindex < HPMCnames.size(); HPMCindex += 1) begin
// unlikely to have more than 10M in any counter.