From a3a45f696f6ac74a901d42c08231f2dc7d4f97a1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 17:49:44 -0600 Subject: [PATCH 01/12] Fixed a bunch of odd bugs with the test bench preventing correct measurement of performance counters. --- sim/wave.do | 27 +++++++++++++++++++-------- testbench/common/functionName.sv | 30 ++++++++++++++++++++++-------- testbench/testbench.sv | 6 ------ 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/sim/wave.do b/sim/wave.do index d9e8869ad..fd95f6f17 100644 --- a/sim/wave.do +++ b/sim/wave.do @@ -6,6 +6,17 @@ add wave -noupdate /testbench/reset add wave -noupdate /testbench/reset_ext add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/core/SATP_REGW +add wave -noupdate /testbench/FunctionName/FunctionName/PCD +add wave -noupdate /testbench/FunctionName/FunctionName/PCE +add wave -noupdate /testbench/FunctionName/FunctionName/PCF +add wave -noupdate /testbench/FunctionName/FunctionName/PCM +add wave -noupdate /testbench/FunctionName/FunctionName/PCM_temp +add wave -noupdate /testbench/FunctionName/FunctionName/PCMOld +add wave -noupdate /testbench/dut/core/InstrValidM +add wave -noupdate /testbench/FunctionName/FunctionName/FunctionAddr +add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrIndex +add wave -noupdate /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate /testbench/FunctionName/FunctionName/ProgramAddrMapLineCount add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM add wave -noupdate -group HDU -expand -group hazards -color Pink /testbench/dut/core/hzu/TrapM add wave -noupdate -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD @@ -55,11 +66,12 @@ add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/PCE -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ifu/InstrE -add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE -add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE +add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE +add wave -noupdate -group {Execution Stage} /testbench/InstrEName +add wave -noupdate -group {Execution Stage} /testbench/dut/core/ieu/c/InstrValidE +add wave -noupdate -expand -group {Memory Stage} /testbench/FunctionName/FunctionName/FunctionName +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrValidM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName @@ -622,10 +634,9 @@ add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/ICacheAcc add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMiss add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM add wave -noupdate /testbench/clk -add wave -noupdate /testbench/HPMCSample/FinalHPMCOUNTERH add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {717301 ns} 0} {{Cursor 5} {394987 ns} 1} +WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {49231900 ns} 0} {{Cursor 5} {394987 ns} 1} quietly wave cursor active 4 configure wave -namecolwidth 250 configure wave -valuecolwidth 194 @@ -641,4 +652,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {717254 ns} {717585 ns} +WaveRestoreZoom {49231842 ns} {49231960 ns} diff --git a/testbench/common/functionName.sv b/testbench/common/functionName.sv index 17b27ec5d..2c658a264 100644 --- a/testbench/common/functionName.sv +++ b/testbench/common/functionName.sv @@ -35,22 +35,29 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); string FunctionName; - logic [`XLEN-1:0] PCF, PCD, PCE, FunctionAddr; - logic StallD, StallE, FlushD, FlushE; + logic [`XLEN-1:0] PCF, PCD, PCE, PCM, FunctionAddr, PCM_temp, PCMOld; + logic StallD, StallE, StallM, FlushD, FlushE, FlushM; + logic InstrValidM; integer ProgramAddrIndex, ProgramAddrIndexQ; assign PCF = testbench.dut.core.ifu.PCF; assign StallD = testbench.dut.core.StallD; assign StallE = testbench.dut.core.StallE; + assign StallM = testbench.dut.core.StallM; assign FlushD = testbench.dut.core.FlushD; assign FlushE = testbench.dut.core.FlushE; + assign FlushM = testbench.dut.core.FlushM; + assign InstrValidM = testbench.dut.core.InstrValidM; // copy from ifu // when the F and D stages are flushed we need to ensure the PCE is held so that the function name does not // erroneously change. - flopenrc #(`XLEN) PCDReg(clk, reset, 1'b0, ~StallD, FlushE & FlushD ? PCE : PCF, PCD); - flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushE ? PCE : PCD, PCE); - + // also need to hold the old value not an erroneously fetched PC. + flopenr #(`XLEN) PCDReg(clk, reset, ~StallD, FlushD ? PCE : PCF, PCD); + flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, FlushD & FlushE ? PCF : FlushE ? PCE : PCD, PCE); + flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, FlushD & FlushE & FlushM ? PCF : FlushE & FlushM ? PCE : FlushM ? PCM : PCE, PCM_temp); + flopenr #(`XLEN) PCMOldReg(clk, reset, InstrValidM, PCM_temp, PCMOld); + assign PCM = InstrValidM ? PCM_temp : PCMOld; task automatic bin_search_min; @@ -111,7 +118,11 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); // preload // initial begin - always @ (posedge reset) begin + always @ (negedge reset) begin + // clear out the old mapping between programs. + foreach(ProgramAddrMapMemory[i]) ProgramAddrMapMemory.delete(i); + foreach(ProgramLabelMapMemory[i]) ProgramLabelMapMemory.delete(i); + $readmemh(ProgramAddrMapFile, ProgramAddrMapMemory); // we need to count the number of lines in the file so we can set FunctionRadixLineCount. @@ -147,11 +158,14 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); $display("Cannot open file %s for reading.", ProgramLabelMapFile); end $fclose(ProgramLabelMapFP); + + foreach(ProgramAddrMapMemory[i]) $display("%x", ProgramAddrMapMemory[i]); + foreach(ProgramLabelMapMemory[i]) $display("%s", ProgramLabelMapMemory[i]); end - always @(PCE) begin - bin_search_min(PCE, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex); + always @(PCM) begin + bin_search_min(PCM, ProgramAddrMapLineCount, ProgramAddrMapMemory, FunctionAddr, ProgramAddrIndex); end logic OrReducedAdr, AnyUnknown; diff --git a/testbench/testbench.sv b/testbench/testbench.sv index e6f025e83..c4581fa8b 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -408,7 +408,6 @@ logic [3:0] dummy; logic StartSample; logic EndSample, EndSampleFirst, EndSampleDelayed; logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0]; - logic [`XLEN-1:0] FinalHPMCOUNTERH[`COUNTERS-1:0]; string HPMCnames[] = '{"Mcycle", "------", @@ -464,11 +463,6 @@ logic [3:0] dummy; InitialHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex]; end end - if(EndSample) begin - for(HPMCindex = 0; HPMCindex < 32; HPMCindex += 1) begin - FinalHPMCOUNTERH[HPMCindex] <= dut.core.priv.priv.csr.counters.counters.HPMCOUNTER_REGW[HPMCindex]; - end - end if(EndSample) begin for(HPMCindex = 0; HPMCindex < HPMCnames.size(); HPMCindex += 1) begin // unlikely to have more than 10M in any counter. From f07f331f7258467535fb683449372f16a591b64b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 3 Mar 2023 17:52:00 -0600 Subject: [PATCH 02/12] Removed debugging code. --- testbench/common/functionName.sv | 3 --- 1 file changed, 3 deletions(-) diff --git a/testbench/common/functionName.sv b/testbench/common/functionName.sv index 2c658a264..c986c2e51 100644 --- a/testbench/common/functionName.sv +++ b/testbench/common/functionName.sv @@ -159,9 +159,6 @@ module FunctionName(reset, clk, ProgramAddrMapFile, ProgramLabelMapFile); end $fclose(ProgramLabelMapFP); - foreach(ProgramAddrMapMemory[i]) $display("%x", ProgramAddrMapMemory[i]); - foreach(ProgramLabelMapMemory[i]) $display("%s", ProgramLabelMapMemory[i]); - end always @(PCM) begin From 93f2bacdae58a13bcc6e6c1f2b54ca8da1a09ba7 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 13:45:15 -0600 Subject: [PATCH 03/12] Updated parsing script. --- bin/parseHPMC.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index 1ce084fc3..dcac5182a 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -39,20 +39,20 @@ def ComputeCPI(benchmark): def ComputeBranchDirMissRate(benchmark): 'Computes and inserts branch direction miss prediction rate.' (nameString, opt, dataDict) = benchmark - branchDirMissRate = 100.0 * int(dataDict['Br Dir Wrong']) / int(dataDict['Br Count']) + branchDirMissRate = 100.0 * int(dataDict['BP Dir Wrong']) / int(dataDict['Br Count']) dataDict['BDMR'] = branchDirMissRate def ComputeBranchTargetMissRate(benchmark): 'Computes and inserts branch target miss prediction rate.' # *** this is wrong in the verilog test bench (nameString, opt, dataDict) = benchmark - branchTargetMissRate = 100.0 * int(dataDict['Br Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump, JR, Jal']) + int(dataDict['ret'])) + branchTargetMissRate = 100.0 * int(dataDict['BP Target Wrong']) / (int(dataDict['Br Count']) + int(dataDict['Jump Not Return'])) dataDict['BTMR'] = branchTargetMissRate def ComputeRASMissRate(benchmark): 'Computes and inserts return address stack miss prediction rate.' (nameString, opt, dataDict) = benchmark - RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['ret']) + RASMPR = 100.0 * int(dataDict['RAS Wrong']) / int(dataDict['Return']) dataDict['RASMPR'] = RASMPR def ComputeInstrClassMissRate(benchmark): @@ -70,7 +70,9 @@ def ComputeICacheMissRate(benchmark): def ComputeICacheMissTime(benchmark): 'Computes and inserts instruction class miss prediction rate.' (nameString, opt, dataDict) = benchmark - ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / int(dataDict['I Cache Miss']) + cycles = int(dataDict['I Cache Miss']) + if(cycles == 0): ICacheMR = 0 + else: ICacheMR = 100.0 * int(dataDict['I Cache Cycles']) / cycles dataDict['ICacheMT'] = ICacheMR def ComputeDCacheMissRate(benchmark): @@ -82,8 +84,10 @@ def ComputeDCacheMissRate(benchmark): def ComputeDCacheMissTime(benchmark): 'Computes and inserts instruction class miss prediction rate.' (nameString, opt, dataDict) = benchmark - ICacheMR = 100.0 * int(dataDict['D Cache Cycles']) / int(dataDict['D Cache Miss']) - dataDict['DCacheMT'] = ICacheMR + cycles = int(dataDict['D Cache Miss']) + if(cycles == 0): DCacheMR = 0 + else: DCacheMR = 100.0 * int(dataDict['D Cache Cycles']) / cycles + dataDict['DCacheMT'] = DCacheMR def ComputeAll(benchmarks): for benchmark in benchmarks: From e9fa2344108b0410e326e8f627583458ad6c7c76 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 17:10:58 -0600 Subject: [PATCH 04/12] Partial automation of branch predictor embenching. --- bin/parseHPMC.py | 4 +- sim/bpred-sim.py | 143 +++++++++++++++++++++++++++++++++++++++++++++ sim/wally-batch.do | 32 ++++++++++ 3 files changed, 177 insertions(+), 2 deletions(-) create mode 100755 sim/bpred-sim.py diff --git a/bin/parseHPMC.py b/bin/parseHPMC.py index dcac5182a..3229d1c74 100755 --- a/bin/parseHPMC.py +++ b/bin/parseHPMC.py @@ -221,9 +221,9 @@ if(sys.argv[1] == '-b'): for benchmark in benchmarkAll: (name, opt, config, dataDict) = benchmark if name+'_'+opt in benchmarkDict: - benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR'])) + benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR'])) else: - benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])] + benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])] size = len(benchmarkDict) index = 1 diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py new file mode 100755 index 000000000..3c5aa2d5a --- /dev/null +++ b/sim/bpred-sim.py @@ -0,0 +1,143 @@ +#!/usr/bin/python3 +################################## +# +# regression-wally +# David_Harris@Hmc.edu 25 January 2021 +# Modified by Jarred Allen +# +# Run a regression with multiple configurations in parallel and exit with +# non-zero status code if an error happened, as well as printing human-readable +# output. +# +################################## +import sys,os,shutil + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +from collections import namedtuple +regressionDir = os.path.dirname(os.path.abspath(__file__)) +os.chdir(regressionDir) + +TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) +# name: the name of this test configuration (used in printing human-readable +# output and picking logfile names) +# cmd: the command to run to test (should include the logfile as '{}', and +# the command needs to write to that file) +# grepstr: the string to grep through the log file for. The test succeeds iff +# grep finds that string in the logfile (is used by grep, so it may +# be any pattern grep accepts, see `man 1 grep` for more info). + +# edit this list to add more test cases +configs = [ + TestCase( + name="lints", + variant="all", + cmd="./lint-wally | tee {}", + grepstr="All lints run with no errors or warnings" + ) +] + +configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_TWOBIT\" +define+BPRED_SIZE=6" +tc = TestCase( + name="twobit6", + variant="rv32gc", + cmd="vsim > {} -c < {} -c < {} -c < /dev/null" % (text, logfile) + return os.system(grepcmd) == 0 + +def run_test_case(config): + """Run the given test case, and return 0 if the test suceeds and 1 if it fails""" + logname = "logs/"+config.variant+"_"+config.name+".log" + cmd = config.cmd.format(logname) + print(cmd) + os.chdir(regressionDir) + os.system(cmd) + if search_log_for_text(config.grepstr, logname): + print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name)) + return 0 + else: + print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name)) + print(" Check %s" % logname) + return 1 + +def main(): + """Run the tests and count the failures""" + TIMEOUT_DUR = 10800 # 3 hours + + global configs + try: + os.chdir(regressionDir) + os.mkdir("logs") + #print(os.getcwd()) + #print(regressionDir) + except: + pass + try: + shutil.rmtree("wkdir") + except: + pass + finally: + os.mkdir("wkdir") + + if '-makeTests' in sys.argv: + os.chdir(regressionDir) + os.system('./make-tests.sh | tee ./logs/make-tests.log') + + # Scale the number of concurrent processes to the number of test cases, but + # max out at a limited number of concurrent processes to not overwhelm the system + with Pool(processes=min(len(configs),40)) as pool: + num_fail = 0 + results = {} + for config in configs: + results[config] = pool.apply_async(run_test_case,(config,)) + for (config,result) in results.items(): + try: + num_fail+=result.get(timeout=TIMEOUT_DUR) + except TimeoutError: + num_fail+=1 + print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR)) + + # Count the number of failures + if num_fail: + print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail) + else: + print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}") + return num_fail + +if __name__ == '__main__': + exit(main()) diff --git a/sim/wally-batch.do b/sim/wally-batch.do index 7e63de8ad..0f03f9032 100644 --- a/sim/wally-batch.do +++ b/sim/wally-batch.do @@ -26,6 +26,14 @@ if {$2 eq "ahb"} { vdel -lib wkdir/work_${1}_${2}_${3}_${4} -all } vlib wkdir/work_${1}_${2}_${3}_${4} + + +} elseif {$2 eq "configOptions"} { + if [file exists wkdir/work_${1}_${3}_${4}] { + vdel -lib wkdir/work_${1}_${3}_${4} -all + } + vlib wkdir/work_${1}_${3}_${4} + } else { if [file exists wkdir/work_${1}_${2}] { vdel -lib wkdir/work_${1}_${2} -all @@ -76,6 +84,30 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { # power add -r /dut/core/* run -all # power off -r /dut/core/* + +} elseif {$2 eq "configOptions"} { + # set arguments " " + # for {set i 5} {$i <= $argc} {incr i} { + # append arguments "\$$i " + # } + # puts $arguments + # set options eval $arguments + # **** fix this so we can pass any number of +defines. + # only allows 3 right now + + vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7 + # start and run simulation + # remove +acc flag for faster sim during regressions if there is no need to access internal signals + vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt + vsim -lib wkdir/work_${1}_${3}_${4} testbenchopt -fatal 7 -suppress 3829 + # Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time + #vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf + #vsim -coverage -lib work_$2 workopt_$2 + # power add generates the logging necessary for said generation. + # power add -r /dut/core/* + run -all + # power off -r /dut/core/* + } else { vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 # start and run simulation From 6766ecc28eb4bb56f1ac20ea091d82e443efcbb7 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sat, 4 Mar 2023 17:20:45 -0600 Subject: [PATCH 05/12] Mostly working bpred launch script. --- sim/bpred-sim.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py index 3c5aa2d5a..1ec8b3045 100755 --- a/sim/bpred-sim.py +++ b/sim/bpred-sim.py @@ -46,31 +46,18 @@ configs = [ ) ] -configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_TWOBIT\" +define+BPRED_SIZE=6" -tc = TestCase( - name="twobit6", - variant="rv32gc", - cmd="vsim > {} -c < {} -c < {} -c < {} -c < Date: Sat, 4 Mar 2023 17:59:16 -0600 Subject: [PATCH 06/12] Working batch mode branch prediction simulations. --- sim/bpred-sim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/bpred-sim.py b/sim/bpred-sim.py index 1ec8b3045..60574e371 100755 --- a/sim/bpred-sim.py +++ b/sim/bpred-sim.py @@ -47,7 +47,7 @@ configs = [ ] bpdSize = [6, 8, 10, 12, 14, 16] -bpdType = ['twobit', 'gshare'] +bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic'] for CurrBPType in bpdType: for CurrBPSize in bpdSize: name = CurrBPType+str(CurrBPSize) From be0318209e71579227943af980fe61c88e721109 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 13:14:48 -0600 Subject: [PATCH 07/12] Updated fpga ila script. --- fpga/constraints/debug2.xdc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index 7ea5bf6bd..8c1e0947a 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -266,7 +266,7 @@ connect_debug_port u_ila_0/probe50 [get_nets [list wallypipelinedsoc/uncore.unco create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe51] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe51] -connect_debug_port u_ila_0/probe51 [get_nets [list wallypipelinedsoc/core/hzu/BPPredWrongE ]] +connect_debug_port u_ila_0/probe51 [get_nets [list wallypipelinedsoc/core/hzu/BPWrongE ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe52] From fdfb80a818d3dfd8c103190d9df082beeb78e2b6 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 17:47:55 -0600 Subject: [PATCH 08/12] Renamed ebuarbfsm to ebufsmarb to match figures. --- src/ebu/{ebuarbfsm.sv => ebufsmarb.sv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/ebu/{ebuarbfsm.sv => ebufsmarb.sv} (100%) diff --git a/src/ebu/ebuarbfsm.sv b/src/ebu/ebufsmarb.sv similarity index 100% rename from src/ebu/ebuarbfsm.sv rename to src/ebu/ebufsmarb.sv From 473ed2b475e95cb3e6479d5183567ab936d77f01 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 17:48:57 -0600 Subject: [PATCH 09/12] Renamed InstrFirstHalf to InstrFirstHalfF. --- src/ifu/spill.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4b89a3cef..4d971cbae 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -58,7 +58,7 @@ module spill #( logic SpillF; logic SelSpillF; logic SpillSaveF; - logic [15:0] InstrFirstHalf; + logic [15:0] InstrFirstHalfF; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic @@ -102,10 +102,10 @@ module spill #( //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalf); + flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); // merge together - mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalf}, SpillF, PostSpillInstrRawF); + mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x always_comb From 31fcc0daf788bde48c1e7c6676f7962e49575e90 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 17:50:57 -0600 Subject: [PATCH 10/12] Renamed PCFSpill to PCSpillF. --- src/ifu/ifu.sv | 8 ++++---- src/ifu/spill.sv | 4 ++-- src/lsu/lsu.sv | 4 ++-- src/mmu/hptw.sv | 4 ++-- src/wally/wallypipelinedcore.sv | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 2c2ee7b4e..fa3eab16f 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -47,7 +47,7 @@ module ifu ( output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU - output logic [`XLEN-1:0] PCFSpill, // PCF with possible + 2 to handle spill to HPTW + output logic [`XLEN-1:0] PCSpillF, // PCF with possible + 2 to handle spill to HPTW // Execute output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic PCSrcE, // Executation stage branch is taken @@ -136,7 +136,7 @@ module ifu ( logic CacheCommittedF; // I$ memory operation started, delay interrupts logic SelIROM; // PMA indicates instruction address is in the IROM - assign PCFExt = {2'b00, PCFSpill}; + assign PCFExt = {2'b00, PCSpillF}; ///////////////////////////////////////////////////////////////////////////////////////////// // Spill Support @@ -144,10 +144,10 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCSpillF, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill assign PCNextFSpill = PCNextF; - assign PCFSpill = PCF; + assign PCSpillF = PCF; assign PostSpillInstrRawF = InstrRawF; assign {SelNextSpillF, CompressedF} = 0; end diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index 4d971cbae..d4e85faf0 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -44,7 +44,7 @@ module spill #( input logic ITLBMissF, // ITLB miss, ignore memory request input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill - output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill + output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic CompressedF); // The fetched instruction is compressed @@ -69,7 +69,7 @@ module spill #( // select between PCNextF and PCF+2 mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill)); // select between PCF and PCF+2 - mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill)); + mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF)); //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9f11f7007..91ad694e9 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -80,7 +80,7 @@ module lsu ( input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege input logic [1:0] STATUS_MPP, // Machine previous privilege mode - input logic [`XLEN-1:0] PCFSpill, // Fetch PC + input logic [`XLEN-1:0] PCSpillF, // Fetch PC input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk input logic InstrUpdateDAF, // ITLB hit needs to update dirty or access bits output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB @@ -152,7 +152,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrUpdateDAF, .DataUpdateDAM, - .FlushW, .DCacheStallM, .SATP_REGW, .PCFSpill, + .FlushW, .DCacheStallM, .SATP_REGW, .PCSpillF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index f2df8ea92..aac0a606c 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -34,7 +34,7 @@ module hptw ( input logic clk, reset, input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table - input logic [`XLEN-1:0] PCFSpill, // addresses to translate + input logic [`XLEN-1:0] PCSpillF, // addresses to translate input logic [`XLEN+1:0] IEUAdrExtM, // addresses to translate input logic [1:0] MemRWM, AtomicM, // system status @@ -111,7 +111,7 @@ module hptw ( assign TLBMiss = (DTLBMissOrDAFaultM | ITLBMissOrDAFaultF); // Determine which address to translate - mux2 #(`XLEN) vadrmux(PCFSpill, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr); + mux2 #(`XLEN) vadrmux(PCSpillF, IEUAdrExtM[`XLEN-1:0], DTLBWalk, TranslationVAdr); assign CurrentPPN = PTE[`PPN_BITS+9:10]; // State flops diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 6c2d5816b..a226c7839 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -63,7 +63,7 @@ module wallypipelinedcore ( logic [2:0] Funct3E; logic [31:0] InstrD; logic [31:0] InstrM; - logic [`XLEN-1:0] PCFSpill, PCE, PCLinkE; + logic [`XLEN-1:0] PCSpillF, PCE, PCLinkE; logic [`XLEN-1:0] PCM; logic [`XLEN-1:0] CSRReadValW, MDUResultW; logic [`XLEN-1:0] UnalignedPCNextF, PC2NextF; @@ -170,7 +170,7 @@ module wallypipelinedcore ( .InstrValidM, .InstrValidE, .InstrValidD, .BranchD, .BranchE, .JumpD, .JumpE, .ICacheStallF, // Fetch - .HRDATA, .PCFSpill, .IFUHADDR, .PC2NextF, + .HRDATA, .PCSpillF, .IFUHADDR, .PC2NextF, .IFUStallF, .IFUHBURST, .IFUHTRANS, .IFUHSIZE, .IFUHREADY, .IFUHWRITE, .ICacheAccess, .ICacheMiss, // Execute @@ -241,7 +241,7 @@ module wallypipelinedcore ( .StoreAmoMisalignedFaultM, // connects to privilege .StoreAmoAccessFaultM, // connects to privilege .InstrUpdateDAF, - .PCFSpill, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, + .PCSpillF, .ITLBMissF, .PTE, .PageType, .ITLBWriteF, .SelHPTW, .LSUStallM); if(`BUS_SUPPORTED) begin : ebu From a6b851a672cc897ff03ca19d4ca7ce71530a00c3 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 6 Mar 2023 18:29:21 -0600 Subject: [PATCH 11/12] Renamed signals to be consistent with textbook. --- src/ifu/ifu.sv | 18 +++++++++--------- src/ifu/spill.sv | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index fa3eab16f..41b0de00e 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -101,7 +101,7 @@ module ifu ( logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4 logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed) - logic [`XLEN-1:0] PCNextFSpill; // Next PCF after possible + 2 to handle spill + logic [`XLEN-1:0] PCSpillNextF; // Next PCF after possible + 2 to handle spill logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j) logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F logic [`XLEN-1:0] PCD; // Decode stage instruction address @@ -126,7 +126,7 @@ module ifu ( logic CacheableF; // PMA indicates instruction address is cacheable - logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF + logic SelSpillNextF; // In a spill, stall pipeline and gate local stallF logic BusStall; // Bus interface busy with multicycle operation logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation logic GatedStallD; // StallD gated by selected next spill @@ -144,12 +144,12 @@ module ifu ( if(`C_SUPPORTED) begin : Spill spill #(`ICACHE_SUPPORTED) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, - .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCSpillF, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + .InstrUpdateDAF, .IFUCacheBusStallD, .ITLBMissF, .PCSpillNextF, .PCSpillF, .SelSpillNextF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpill - assign PCNextFSpill = PCNextF; + assign PCSpillNextF = PCNextF; assign PCSpillF = PCF; assign PostSpillInstrRawF = InstrRawF; - assign {SelNextSpillF, CompressedF} = 0; + assign {SelSpillNextF, CompressedF} = 0; end //////////////////////////////////////////////////////////////////////////////////////////////// @@ -213,7 +213,7 @@ module ifu ( logic IROMce; assign IROMce = ~GatedStallD | reset; assign IFURWF = 2'b10; - irom irom(.clk, .ce(IROMce), .Adr(PCNextFSpill[`XLEN-1:0]), .IROMInstrF); + irom irom(.clk, .ce(IROMce), .Adr(PCSpillNextF[`XLEN-1:0]), .IROMInstrF); end else begin assign IFURWF = 2'b10; end @@ -245,7 +245,7 @@ module ifu ( .CacheWriteData('0), .CacheRW(CacheRWF), .CacheAtomic('0), .FlushCache('0), - .NextAdr(PCNextFSpill[11:0]), + .NextAdr(PCSpillNextF[11:0]), .PAdr(PCPF), .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW) @@ -286,8 +286,8 @@ module ifu ( end assign IFUCacheBusStallD = ICacheStallF | BusStall; - assign IFUStallF = IFUCacheBusStallD | SelNextSpillF; - assign GatedStallD = StallD & ~SelNextSpillF; + assign IFUStallF = IFUCacheBusStallD | SelSpillNextF; + assign GatedStallD = StallD & ~SelSpillNextF; flopenl #(32) AlignedInstrRawDFlop(clk, reset | FlushD, ~StallD, PostSpillInstrRawF, nop, InstrRawD); diff --git a/src/ifu/spill.sv b/src/ifu/spill.sv index d4e85faf0..73f302ca9 100644 --- a/src/ifu/spill.sv +++ b/src/ifu/spill.sv @@ -43,9 +43,9 @@ module spill #( input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic ITLBMissF, // ITLB miss, ignore memory request input logic InstrUpdateDAF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) - output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill + output logic [`XLEN-1:0] PCSpillNextF, // The next PCF for one of the two memory addresses of the spill output logic [`XLEN-1:0] PCSpillF, // PCF for one of the two memory addresses of the spill - output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline + output logic SelSpillNextF, // During the transition between the two spill operations, the IFU should stall the pipeline output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic CompressedF); // The fetched instruction is compressed @@ -57,7 +57,7 @@ module spill #( logic TakeSpillF; logic SpillF; logic SelSpillF; - logic SpillSaveF; + logic SpillSaveF; logic [15:0] InstrFirstHalfF; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -67,7 +67,7 @@ module spill #( // compute PCF+2 from the raw PC+4 mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); // select between PCNextF and PCF+2 - mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill)); + mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelSpillNextF & ~FlushD), .y(PCSpillNextF)); // select between PCF and PCF+2 mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCSpillF)); @@ -94,7 +94,7 @@ module spill #( end assign SelSpillF = (CurrState == STATE_SPILL); - assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD); + assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD); assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD; //////////////////////////////////////////////////////////////////////////////////////////////////// From f067935eed09e053fa9d7f75e260382df1b33aae Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 7 Mar 2023 10:49:59 -0600 Subject: [PATCH 12/12] Added Yujun Lin's branch predictor simulator. This is a C baseline module for common branch predictor algorithms. --- .gitmodules | 3 +++ addins/branch-predictor-simulator | 1 + 2 files changed, 4 insertions(+) create mode 160000 addins/branch-predictor-simulator diff --git a/.gitmodules b/.gitmodules index ab45d3f96..78243a1de 100644 --- a/.gitmodules +++ b/.gitmodules @@ -21,3 +21,6 @@ [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark +[submodule "addins/branch-predictor-simulator"] + path = addins/branch-predictor-simulator + url = https://github.com/synxlin/branch-predictor-simulator.git diff --git a/addins/branch-predictor-simulator b/addins/branch-predictor-simulator new file mode 160000 index 000000000..af0c6f8cb --- /dev/null +++ b/addins/branch-predictor-simulator @@ -0,0 +1 @@ +Subproject commit af0c6f8cb62f48ee43e74c21e799102e03951ce2