Merge pull request #439 from ross144/main

Fixes to branch predictor processing scripts.
This commit is contained in:
David Harris 2023-10-24 08:31:06 -07:00 committed by GitHub
commit ea571a6e3b
4 changed files with 144 additions and 61 deletions

57
bin/CModelBTBAccuracy.sh Executable file
View File

@ -0,0 +1,57 @@
#!/bin/bash
###########################################
## Written: ross1728@gmail.com
## Created: 23 October 2023
## Modified:
##
## Purpose: Takes a directory of branch outcomes organized as 1 files per benchmark.
## Computes the geometric mean for btb accuracy
##
## A component of the CORE-V-WALLY configurable RISC-V project.
##
## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
##
## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
##
## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
## except in compliance with the License, or, at your option, the Apache License version 2.0. You
## may obtain a copy of the License at
##
## https:##solderpad.org/licenses/SHL-2.1/
##
## Unless required by applicable law or agreed to in writing, any work distributed under the
## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
## either express or implied. See the License for the specific language governing permissions
## and limitations under the License.
################################################################################################
Directory="$1"
Files="$1/*.log"
for Size in $(seq 6 2 16)
do
Product=1.0
Count=0
BMDRArray=()
for File in $Files
do
lines=`sim_bp gshare 16 16 $Size 1 $File | tail -5`
Total=`echo "$lines" | head -1 | awk '{print $5}'`
Miss=`echo "$lines" | tail -2 | head -1 | awk '{print $8}'`
BMDR=`echo "$Miss / $Total" | bc -l`
BMDRArray+=("$BMDR")
if [ $Miss -eq 0 ]; then
Product=`echo "scale=200; $Product / $Total" | bc -l`
else
Product=`echo "scale=200; $Product * $Miss / $Total" | bc -l`
fi
Count=$((Count+1))
done
# with such long precision bc outputs onto multiple lines
# must remove \n and \ from string
Product=`echo "$Product" | tr -d '\n' | tr -d '\\\'`
GeoMean=`perl -E "say $Product**(1/$Count)"`
echo "$Pred$Size $GeoMean"
done

View File

@ -11,6 +11,7 @@
#
##################################
import sys,os,shutil
import argparse
class bcolors:
HEADER = '\033[95m'
@ -46,55 +47,6 @@ configs = [
)
]
# bpdSize = [6, 8, 10, 12, 14, 16]
# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# name = CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize)
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
# bpdSize = [6, 8, 10, 12, 14, 16]
# for CurrBPSize in bpdSize:
# name = 'BTB'+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=1 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE"
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
bpdSize = [2, 3, 4, 6, 10, 16]
for CurrBPSize in bpdSize:
name = 'RAS'+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=16" + "+define+RAS_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE+define+RAS_OVERRIDE"
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
# bpdSize = [6, 8, 10, 12, 14, 16]
# LHRSize = [4, 8, 10]
# bpdType = ['local_repair']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# for CurrLHRSize in LHRSize:
# name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
import os
from multiprocessing import Pool, TimeoutError
@ -138,10 +90,71 @@ def main():
finally:
os.mkdir("wkdir")
if '-makeTests' in sys.argv:
os.chdir(regressionDir)
os.system('./make-tests.sh | tee ./logs/make-tests.log')
parser = argparse.ArgumentParser(description='Runs embench with sweeps of branch predictor sizes and types.')
mode = parser.add_mutually_exclusive_group()
mode.add_argument('-r', '--ras', action='store_const', help='Sweep size of return address stack (RAS).', default=False, const=True)
mode.add_argument('-d', '--direction', action='store_const', help='Sweep size of direction prediction (2-bit, Gshare, local, etc).', default=False, const=True)
mode.add_argument('-t', '--target', action='store_const', help='Sweep size of branch target buffer (BTB).', default=False, const=True)
mode.add_argument('-c', '--iclass', action='store_const', help='Sweep size of classification (BTB) Same as -t.', default=False, const=True)
args = parser.parse_args()
if(args.direction):
# for direction predictor size sweep
bpdSize = [6, 8, 10, 12, 14, 16]
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
for CurrBPType in bpdType:
for CurrBPSize in bpdSize:
name = CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize)
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
if(args.target or args.iclass):
# BTB and class size sweep
bpdSize = [6, 8, 10, 12, 14, 16]
for CurrBPSize in bpdSize:
name = 'BTB'+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=1 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+RAS_SIZE=16+define+BTB_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE"
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
# ras size sweep
if(args.ras):
bpdSize = [2, 3, 4, 6, 10, 16]
for CurrBPSize in bpdSize:
name = 'RAS'+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=16" + "+define+RAS_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE+define+RAS_OVERRIDE"
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
# bpdSize = [6, 8, 10, 12, 14, 16]
# LHRSize = [4, 8, 10]
# bpdType = ['local_repair']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# for CurrLHRSize in LHRSize:
# name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
# Scale the number of concurrent processes to the number of test cases, but
# max out at a limited number of concurrent processes to not overwhelm the system
with Pool(processes=min(len(configs),40)) as pool:

View File

@ -51,20 +51,20 @@ module icpred import cvw::*; #(parameter cvw_t P,
// An alternative to using the BTB to store the instruction class is to partially decode
// the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions.
// This logic is not described in the text book as of 23 February 2023.
logic ccall, cj, cjr, ccallr, CJumpF, CBranchF;
logic cjal, cj, cjr, cjalr, CJumpF, CBranchF;
logic NCJumpF, NCBranchF;
if(P.C_SUPPORTED) begin
logic [4:0] CompressedOpcF;
assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
assign ccall = CompressedOpcF == 5'h09 & P.XLEN == 32;
assign cjal = CompressedOpcF == 5'h09 & P.XLEN == 32;
assign cj = CompressedOpcF == 5'h0d;
assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign CJumpF = ccall | cj | cjr | ccallr;
assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign CJumpF = cjal | cj | cjr | cjalr;
assign CBranchF = CompressedOpcF[4:1] == 4'h7;
end else begin
assign {ccall, cj, cjr, ccallr, CJumpF, CBranchF} = '0;
assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0;
end
assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
@ -72,11 +72,11 @@ module icpred import cvw::*; #(parameter cvw_t P,
assign BPBranchF = NCBranchF | (P.C_SUPPORTED & CBranchF);
assign BPJumpF = NCJumpF | (P.C_SUPPORTED & (CJumpF));
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5
(P.C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 & PostSpillInstrRawF[11:7] == 5'b0) | // return must return to ra or r5
(P.C_SUPPORTED & cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5
(P.C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
(P.C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
end else begin
// This section connects the BTB's instruction class prediction.

View File

@ -340,8 +340,21 @@ module ifu import cvw::*; #(parameter cvw_t P) (
end else begin : bpred
mux2 #(P.XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF));
logic BranchM, JumpM, BranchW, JumpW;
logic CallD, CallE, CallM, CallW;
logic ReturnD, ReturnE, ReturnM, ReturnW;
assign BPWrongE = PCSrcE;
assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0;
icpred #(P, 0) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW,
.BTBCallF(1'b0), .BTBReturnF(1'b0), .BTBJumpF(1'b0),
.BTBBranchF(1'b0), .BPCallF(), .BPReturnF(), .BPJumpF(), .BPBranchF(), .IClassWrongM,
.IClassWrongE(), .BPReturnWrongD());
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, BPWrongM);
assign RASPredPCWrongM = '0;
assign BPDirPredWrongM = BPWrongM;
assign BTAWrongM = BPWrongM;
assign InstrClassM = {CallM, ReturnM, JumpM, BranchM};
assign NextValidPCE = PCE;
end