Merge pull request #615 from ross144/main

Updates for branch predictor configs and nightly regression. Correctly automaticed this time.
This commit is contained in:
David Harris 2024-02-04 19:02:42 -08:00 committed by GitHub
commit aa1fe30aae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 130 additions and 59 deletions

View File

@ -285,27 +285,30 @@ RAS_SIZE 32'd4
deriv bpred_GSHARE_10_6_10_1_rv32gc rv32gc
RAS_SIZE 32'd6
deriv bpred_GSHARE_10_2_10_1_rv32gc rv32gc
deriv bpred_GSHARE_10_10_10_1_rv32gc rv32gc
RAS_SIZE 32'd10
deriv bpred_GSHARE_10_16_10_1_rv32gc rv32gc
RAS_SIZE 32'd16
deriv bpred_GSHARE_10_2_6_1_rv32gc rv32gc
deriv bpred_GSHARE_10_16_6_1_rv32gc rv32gc
BTB_SIZE 32'd6
deriv bpred_GSHARE_10_2_8_1_rv32gc rv32gc
deriv bpred_GSHARE_10_16_8_1_rv32gc rv32gc
BTB_SIZE 32'd8
deriv bpred_GSHARE_10_2_12_1_rv32gc rv32gc
deriv bpred_GSHARE_10_16_12_1_rv32gc rv32gc
BTB_SIZE 32'd12
deriv bpred_GSHARE_10_2_14_1_rv32gc rv32gc
deriv bpred_GSHARE_10_16_14_1_rv32gc rv32gc
BTB_SIZE 32'd14
deriv bpred_GSHARE_10_2_16_1_rv32gc rv32gc
deriv bpred_GSHARE_10_16_16_1_rv32gc rv32gc
BTB_SIZE 32'd16
deriv bpred_GSHARE_6_16_10_0_rv32gc rv32gc bpred_GSHARE_6_16_10_1_rv32gc
INSTR_CLASS_PRED 0
@ -354,25 +357,25 @@ INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_6_10_0_rv32gc rv32gc bpred_GSHARE_10_6_10_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_2_10_0_rv32gc rv32gc bpred_GSHARE_10_2_10_1_rv32gc
deriv bpred_GSHARE_10_10_10_0_rv32gc rv32gc bpred_GSHARE_10_10_10_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_16_10_0_rv32gc rv32gc bpred_GSHARE_10_16_10_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_2_6_0_rv32gc rv32gc bpred_GSHARE_10_2_6_1_rv32gc
deriv bpred_GSHARE_10_16_6_0_rv32gc rv32gc bpred_GSHARE_10_16_6_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_2_8_0_rv32gc rv32gc bpred_GSHARE_10_2_8_1_rv32gc
deriv bpred_GSHARE_10_16_8_0_rv32gc rv32gc bpred_GSHARE_10_16_8_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_2_12_0_rv32gc rv32gc bpred_GSHARE_10_2_12_1_rv32gc
deriv bpred_GSHARE_10_16_12_0_rv32gc rv32gc bpred_GSHARE_10_16_12_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_2_14_0_rv32gc rv32gc bpred_GSHARE_10_2_14_1_rv32gc
deriv bpred_GSHARE_10_16_14_0_rv32gc rv32gc bpred_GSHARE_10_16_14_1_rv32gc
INSTR_CLASS_PRED 0
deriv bpred_GSHARE_10_2_16_0_rv32gc rv32gc bpred_GSHARE_10_2_16_1_rv32gc
deriv bpred_GSHARE_10_16_16_0_rv32gc rv32gc bpred_GSHARE_10_16_16_1_rv32gc
INSTR_CLASS_PRED 0
# Cache configurations

View File

@ -1,12 +1,12 @@
../logs/rv32gc_gshare6.log gshare 6
../logs/rv32gc_gshare8.log gshare 8
../logs/rv32gc_gshare10.log gshare 10
../logs/rv32gc_gshare12.log gshare 12
../logs/rv32gc_gshare14.log gshare 14
../logs/rv32gc_gshare16.log gshare 16
../logs/rv32gc_twobit6.log twobit 6
../logs/rv32gc_twobit8.log twobit 8
../logs/rv32gc_twobit10.log twobit 10
../logs/rv32gc_twobit12.log twobit 12
../logs/rv32gc_twobit14.log twobit 14
../logs/rv32gc_twobit16.log twobit 16
../logs/bpred_GSHARE_6_16_10_0_rv32gc_embench.log gshare 6
../logs/bpred_GSHARE_8_16_10_0_rv32gc_embench.log gshare 8
../logs/bpred_GSHARE_10_16_10_0_rv32gc_embench.log gshare 10
../logs/bpred_GSHARE_12_16_10_0_rv32gc_embench.log gshare 12
../logs/bpred_GSHARE_14_16_10_0_rv32gc_embench.log gshare 14
../logs/bpred_GSHARE_16_16_10_0_rv32gc_embench.log gshare 16
../logs/bpred_TWOBIT_6_16_10_0_rv32gc_embench.log twobit 6
../logs/bpred_TWOBIT_8_16_10_0_rv32gc_embench.log twobit 8
../logs/bpred_TWOBIT_10_16_10_0_rv32gc_embench.log twobit 10
../logs/bpred_TWOBIT_12_16_10_0_rv32gc_embench.log twobit 12
../logs/bpred_TWOBIT_14_16_10_0_rv32gc_embench.log twobit 14
../logs/bpred_TWOBIT_16_16_10_0_rv32gc_embench.log twobit 16

View File

@ -1,6 +1,6 @@
../logs/rv32gc_BTB6.log btb 6
../logs/rv32gc_BTB8.log btb 8
../logs/rv32gc_BTB10.log btb 10
../logs/rv32gc_BTB12.log btb 12
../logs/rv32gc_BTB14.log btb 14
../logs/rv32gc_BTB16.log btb 16
../logs/bpred_GSHARE_16_16_6_0_rv32gc_embench.log btb 6
../logs/bpred_GSHARE_16_16_8_0_rv32gc_embench.log btb 8
../logs/bpred_GSHARE_16_16_10_0_rv32gc_embench.log btb 10
../logs/bpred_GSHARE_16_16_12_0_rv32gc_embench.log btb 12
../logs/bpred_GSHARE_16_16_14_0_rv32gc_embench.log btb 14
../logs/bpred_GSHARE_16_16_16_0_rv32gc_embench.log btb 16

View File

@ -1,6 +1,6 @@
../logs/rv32gc_class6.log class 6
../logs/rv32gc_class8.log class 8
../logs/rv32gc_class10.log class 10
../logs/rv32gc_class12.log class 12
../logs/rv32gc_class14.log class 14
../logs/rv32gc_class16.log class 16
../logs/bpred_GSHARE_16_16_6_1_rv32gc_embench.log btb 6
../logs/bpred_GSHARE_16_16_8_1_rv32gc_embench.log btb 8
../logs/bpred_GSHARE_16_16_10_1_rv32gc_embench.log btb 10
../logs/bpred_GSHARE_16_16_12_1_rv32gc_embench.log btb 12
../logs/bpred_GSHARE_16_16_14_1_rv32gc_embench.log btb 14
../logs/bpred_GSHARE_16_16_16_1_rv32gc_embench.log btb 16

View File

@ -1,5 +1,5 @@
../logs/rv32gc_RAS3.log ras 3
../logs/rv32gc_RAS4.log ras 4
../logs/rv32gc_RAS6.log ras 6
../logs/rv32gc_RAS10.log ras 10
../logs/rv32gc_RAS16.log ras 16
../logs/bpred_GSHARE_10_3_10_0_rv32gc_embench.log ras 3
../logs/bpred_GSHARE_10_4_10_0_rv32gc_embench.log ras 4
../logs/bpred_GSHARE_10_6_10_0_rv32gc_embench.log ras 6
../logs/bpred_GSHARE_10_10_10_0_rv32gc_embench.log ras 10
../logs/bpred_GSHARE_10_16_10_0_rv32gc_embench.log ras 16

View File

@ -11,6 +11,9 @@
#
##################################
import sys,os,shutil
import multiprocessing
class bcolors:
HEADER = '\033[95m'
@ -215,6 +218,55 @@ if (nightly):
["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
### branch predictor simulation
["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# btb
["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ras
["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# enable floating-point tests when lint is fixed
# ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
# ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
@ -232,11 +284,17 @@ if (nightly):
for test in derivconfigtests:
config = test[0];
tests = test[1];
if(len(test) >= 4 and test[2] == "configOptions"):
configOptions = test[3]
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config+" configOptions"
else:
configOptions = ""
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
for t in tests:
tc = TestCase(
name=t,
variant=config,
cmd="vsim > {} -c <<!\ndo wally-batch.do "+config+" "+t+"\n!",
cmd=cmdPrefix+" "+t+" "+configOptions+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
@ -306,13 +364,16 @@ def main():
# Also it is slow to run.
# configs.append(getBuildrootTC(boot=False))
os.system('rm -f cov/*.ucdb')
elif '-nightly' in sys.argv:
TIMEOUT_DUR = 60*1440 # 1 day
configs.append(getBuildrootTC(boot=False))
else:
TIMEOUT_DUR = 10*60 # seconds
configs.append(getBuildrootTC(boot=False))
# Scale the number of concurrent processes to the number of test cases, but
# max out at a limited number of concurrent processes to not overwhelm the system
with Pool(processes=min(len(configs),40)) as pool:
with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool:
num_fail = 0
results = {}
for config in configs:

View File

@ -89,10 +89,10 @@ if {$2 eq "buildroot"} {
# **** fix this so we can pass any number of +defines.
# only allows 3 right now
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/deriv/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286 $5 $6 $7
vlog -lint -work wkdir/work_${1}_${3}_${4} +incdir+../config/$1 +incdir+../config/deriv/$1 +incdir+../config/shared ../src/cvw.sv ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$4 -o testbenchopt
vopt wkdir/work_${1}_${3}_${4}.testbench -work wkdir/work_${1}_${3}_${4} -G TEST=$3 ${4} -o testbenchopt
vsim -lib wkdir/work_${1}_${3}_${4} testbenchopt -fatal 7 -suppress 3829
# Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time
#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf

View File

@ -88,15 +88,17 @@ module bpred import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] BPBTAD;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
logic ReturnD, CallD;
logic ReturnE, CallE;
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic BPReturnWrongD;
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
logic BPBranchD, BPJumpD, BPReturnD, BPCallD;
logic ReturnD, CallD;
logic ReturnE, CallE;
logic BranchM, JumpM, ReturnM, CallM;
logic BranchW, JumpW, ReturnW, CallW;
logic BPReturnWrongD;
logic [P.XLEN-1:0] BPBTAE;
logic BPBTAWrongM;
logic PCSrcM;
// Part 1 branch direction prediction
if (P.BPRED_TYPE == `BP_TWOBIT) begin:Predictor
@ -144,6 +146,8 @@ module bpred import cvw::*; #(parameter cvw_t P) (
.BranchD, .BranchE, .BranchM, .PCSrcE);
end
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, PCSrcM);
// Part 2 Branch target address prediction
// BTB contains target address for all CFI
@ -152,6 +156,7 @@ module bpred import cvw::*; #(parameter cvw_t P) (
.PCNextF, .PCF, .PCD, .PCE, .PCM,
.BPBTAF, .BPBTAD, .BPBTAE,
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
.BPBTAWrongM,
.IClassWrongM, .IClassWrongE,
.IEUAdrE, .IEUAdrM,
.InstrClassD({CallD, ReturnD, JumpD, BranchD}),
@ -196,7 +201,7 @@ module bpred import cvw::*; #(parameter cvw_t P) (
if(P.ZIHPM_SUPPORTED) begin
logic [P.XLEN-1:0] RASPCD, RASPCE;
logic BTAWrongE, RASPredPCWrongE;
logic RASPredPCWrongE;
// performance counters
// 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
@ -208,14 +213,15 @@ module bpred import cvw::*; #(parameter cvw_t P) (
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
// both without the above inaccuracies.
// **** use BPBTAWrongM from BTB.
assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
flopenrc #(P.XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
flopenrc #(P.XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
flopenrc #(2) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
{BPDirPredWrongE, RASPredPCWrongE},
{BPDirPredWrongM, RASPredPCWrongM});
assign BTAWrongM = BPBTAWrongM & PCSrcM;
end else begin
assign {BTAWrongM, RASPredPCWrongM} = '0;

View File

@ -39,6 +39,7 @@ module btb import cvw::*; #(parameter cvw_t P,
output logic [P.XLEN-1:0] BPBTAD,
output logic [P.XLEN-1:0] BPBTAE,
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
output logic BPBTAWrongM,
// update
input logic IClassWrongM, // BTB's instruction class guess was wrong
input logic IClassWrongE,
@ -57,7 +58,7 @@ module btb import cvw::*; #(parameter cvw_t P,
logic [P.XLEN-1:0] IEUAdrW;
logic [P.XLEN-1:0] PCW;
logic BTBWrongE, BPBTAWrongE;
logic BTBWrongM, BPBTAWrongM;
logic BTBWrongM;
// hashing function for indexing the PC