Merge branch 'main' of https://github.com/openhwgroup/cvw into dev

This commit is contained in:
David Harris 2023-10-26 19:02:05 -07:00
commit 734bf021d7
9 changed files with 158 additions and 70 deletions

57
bin/CModelBTBAccuracy.sh Executable file
View File

@ -0,0 +1,57 @@
#!/bin/bash
###########################################
## Written: ross1728@gmail.com
## Created: 23 October 2023
## Modified:
##
## Purpose: Takes a directory of branch outcomes organized as 1 files per benchmark.
## Computes the geometric mean for btb accuracy
##
## A component of the CORE-V-WALLY configurable RISC-V project.
##
## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
##
## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
##
## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
## except in compliance with the License, or, at your option, the Apache License version 2.0. You
## may obtain a copy of the License at
##
## https:##solderpad.org/licenses/SHL-2.1/
##
## Unless required by applicable law or agreed to in writing, any work distributed under the
## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
## either express or implied. See the License for the specific language governing permissions
## and limitations under the License.
################################################################################################
Directory="$1"
Files="$1/*.log"
for Size in $(seq 6 2 16)
do
Product=1.0
Count=0
BMDRArray=()
for File in $Files
do
lines=`sim_bp gshare 16 16 $Size 1 $File | tail -5`
Total=`echo "$lines" | head -1 | awk '{print $5}'`
Miss=`echo "$lines" | tail -2 | head -1 | awk '{print $8}'`
BMDR=`echo "$Miss / $Total" | bc -l`
BMDRArray+=("$BMDR")
if [ $Miss -eq 0 ]; then
Product=`echo "scale=200; $Product / $Total" | bc -l`
else
Product=`echo "scale=200; $Product * $Miss / $Total" | bc -l`
fi
Count=$((Count+1))
done
# with such long precision bc outputs onto multiple lines
# must remove \n and \ from string
Product=`echo "$Product" | tr -d '\n' | tr -d '\\\'`
GeoMean=`perl -E "say $Product**(1/$Count)"`
echo "$Pred$Size $GeoMean"
done

View File

@ -11,6 +11,7 @@
#
##################################
import sys,os,shutil
import argparse
class bcolors:
HEADER = '\033[95m'
@ -46,55 +47,6 @@ configs = [
)
]
# bpdSize = [6, 8, 10, 12, 14, 16]
# bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# name = CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize)
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
# bpdSize = [6, 8, 10, 12, 14, 16]
# for CurrBPSize in bpdSize:
# name = 'BTB'+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=1 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE"
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
bpdSize = [2, 3, 4, 6, 10, 16]
for CurrBPSize in bpdSize:
name = 'RAS'+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=16" + "+define+RAS_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE+define+RAS_OVERRIDE"
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
# bpdSize = [6, 8, 10, 12, 14, 16]
# LHRSize = [4, 8, 10]
# bpdType = ['local_repair']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# for CurrLHRSize in LHRSize:
# name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
import os
from multiprocessing import Pool, TimeoutError
@ -138,10 +90,71 @@ def main():
finally:
os.mkdir("wkdir")
if '-makeTests' in sys.argv:
os.chdir(regressionDir)
os.system('./make-tests.sh | tee ./logs/make-tests.log')
parser = argparse.ArgumentParser(description='Runs embench with sweeps of branch predictor sizes and types.')
mode = parser.add_mutually_exclusive_group()
mode.add_argument('-r', '--ras', action='store_const', help='Sweep size of return address stack (RAS).', default=False, const=True)
mode.add_argument('-d', '--direction', action='store_const', help='Sweep size of direction prediction (2-bit, Gshare, local, etc).', default=False, const=True)
mode.add_argument('-t', '--target', action='store_const', help='Sweep size of branch target buffer (BTB).', default=False, const=True)
mode.add_argument('-c', '--iclass', action='store_const', help='Sweep size of classification (BTB) Same as -t.', default=False, const=True)
args = parser.parse_args()
if(args.direction):
# for direction predictor size sweep
bpdSize = [6, 8, 10, 12, 14, 16]
bpdType = ['twobit', 'gshare', 'global', 'gshare_basic', 'global_basic', 'local_basic']
for CurrBPType in bpdType:
for CurrBPSize in bpdSize:
name = CurrBPType+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=" + str(bpdType.index(CurrBPType)) + "+define+BPRED_SIZE=" + str(CurrBPSize)
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
if(args.target or args.iclass):
# BTB and class size sweep
bpdSize = [6, 8, 10, 12, 14, 16]
for CurrBPSize in bpdSize:
name = 'BTB'+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=1 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+RAS_SIZE=16+define+BTB_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE"
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
# ras size sweep
if(args.ras):
bpdSize = [2, 3, 4, 6, 10, 16]
for CurrBPSize in bpdSize:
name = 'RAS'+str(CurrBPSize)
configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_OVERRIDE +define+BPRED_TYPE=\`BP_GSHARE" + "+define+BPRED_SIZE=16" + "+define+BTB_SIZE=16" + "+define+RAS_SIZE=" + str(CurrBPSize) + "+define+BTB_OVERRIDE+define+RAS_OVERRIDE"
tc = TestCase(
name=name,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
grepstr="")
configs.append(tc)
# bpdSize = [6, 8, 10, 12, 14, 16]
# LHRSize = [4, 8, 10]
# bpdType = ['local_repair']
# for CurrBPType in bpdType:
# for CurrBPSize in bpdSize:
# for CurrLHRSize in LHRSize:
# name = str(CurrLHRSize)+CurrBPType+str(CurrBPSize)
# configOptions = "+define+INSTR_CLASS_PRED=0 +define+BPRED_TYPE=\"BP_" + CurrBPType.upper() + "\" +define+BPRED_SIZE=" + str(CurrBPSize) + " +define+BPRED_NUM_LHR=" + str(CurrLHRSize) + " "
# tc = TestCase(
# name=name,
# variant="rv32gc",
# cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc configOptions " + name + " embench " + configOptions,
# grepstr="")
# configs.append(tc)
# Scale the number of concurrent processes to the number of test cases, but
# max out at a limited number of concurrent processes to not overwhelm the system
with Pool(processes=min(len(configs),40)) as pool:

View File

@ -86,7 +86,7 @@ module hazard (
assign StallFCause = '0;
assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause;
assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause;
assign StallMCause = WFIStallM & ~FlushMCause;
assign StallMCause = WFIStallM & ~FlushMCause;
// Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1.
// assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause;
// Because FlushWCause is a strict subset of FlushDCause, FlushWCause is factored out.

View File

@ -51,20 +51,20 @@ module icpred import cvw::*; #(parameter cvw_t P,
// An alternative to using the BTB to store the instruction class is to partially decode
// the instructions in the Fetch stage into, Call, Return, Jump, and Branch instructions.
// This logic is not described in the text book as of 23 February 2023.
logic ccall, cj, cjr, ccallr, CJumpF, CBranchF;
logic cjal, cj, cjr, cjalr, CJumpF, CBranchF;
logic NCJumpF, NCBranchF;
if(P.C_SUPPORTED) begin
logic [4:0] CompressedOpcF;
assign CompressedOpcF = {PostSpillInstrRawF[1:0], PostSpillInstrRawF[15:13]};
assign ccall = CompressedOpcF == 5'h09 & P.XLEN == 32;
assign cjal = CompressedOpcF == 5'h09 & P.XLEN == 32;
assign cj = CompressedOpcF == 5'h0d;
assign cjr = CompressedOpcF == 5'h14 & ~PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign ccallr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign CJumpF = ccall | cj | cjr | ccallr;
assign cjalr = CompressedOpcF == 5'h14 & PostSpillInstrRawF[12] & PostSpillInstrRawF[6:2] == 5'b0 & PostSpillInstrRawF[11:7] != 5'b0;
assign CJumpF = cjal | cj | cjr | cjalr;
assign CBranchF = CompressedOpcF[4:1] == 4'h7;
end else begin
assign {ccall, cj, cjr, ccallr, CJumpF, CBranchF} = '0;
assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0;
end
assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F;
@ -72,11 +72,11 @@ module icpred import cvw::*; #(parameter cvw_t P,
assign BPBranchF = NCBranchF | (P.C_SUPPORTED & CBranchF);
assign BPJumpF = NCJumpF | (P.C_SUPPORTED & (CJumpF));
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01) | // returnurn must returnurn to ra or r5
(P.C_SUPPORTED & (ccallr | cjr) & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign BPReturnF = (NCJumpF & (PostSpillInstrRawF[19:15] & 5'h1B) == 5'h01 & PostSpillInstrRawF[11:7] == 5'b0) | // return must return to ra or r5
(P.C_SUPPORTED & cjr & ((PostSpillInstrRawF[11:7] & 5'h1B) == 5'h01));
assign BPCallF = (NCJumpF & (PostSpillInstrRawF[11:07] & 5'h1B) == 5'h01) | // call(r) must link to ra or x5
(P.C_SUPPORTED & (ccall | (ccallr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
(P.C_SUPPORTED & (cjal | (cjalr & (PostSpillInstrRawF[11:7] & 5'h1b) == 5'h01)));
end else begin
// This section connects the BTB's instruction class prediction.

View File

@ -340,8 +340,21 @@ module ifu import cvw::*; #(parameter cvw_t P) (
end else begin : bpred
mux2 #(P.XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PC1NextF));
logic BranchM, JumpM, BranchW, JumpW;
logic CallD, CallE, CallM, CallW;
logic ReturnD, ReturnE, ReturnM, ReturnW;
assign BPWrongE = PCSrcE;
assign {InstrClassM, BPDirPredWrongM, BTAWrongM, RASPredPCWrongM, IClassWrongM} = '0;
icpred #(P, 0) icpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
.PostSpillInstrRawF, .InstrD, .BranchD, .BranchE, .JumpD, .JumpE, .BranchM, .BranchW, .JumpM, .JumpW,
.CallD, .CallE, .CallM, .CallW, .ReturnD, .ReturnE, .ReturnM, .ReturnW,
.BTBCallF(1'b0), .BTBReturnF(1'b0), .BTBJumpF(1'b0),
.BTBBranchF(1'b0), .BPCallF(), .BPReturnF(), .BPJumpF(), .BPBranchF(), .IClassWrongM,
.IClassWrongE(), .BPReturnWrongD());
flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, BPWrongM);
assign RASPredPCWrongM = '0;
assign BPDirPredWrongM = BPWrongM;
assign BTAWrongM = BPWrongM;
assign InstrClassM = {CallM, ReturnM, JumpM, BranchM};
assign NextValidPCE = PCE;
end

View File

@ -202,7 +202,7 @@ module csr import cvw::*; #(parameter cvw_t P) (
///////////////////////////////////////////
assign CSRAdrM = InstrM[31:20];
assign UnalignedNextEPCM = TrapM ? ((wfiM & IntPendingM) ? PCM+4 : PCM) : CSRWriteValM;
assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM;
assign NextEPCM = P.C_SUPPORTED ? {UnalignedNextEPCM[P.XLEN-1:1], 1'b0} : {UnalignedNextEPCM[P.XLEN-1:2], 2'b00}; // 3.1.15 alignment
assign NextCauseM = TrapM ? {InterruptM, CauseM}: {CSRWriteValM[P.XLEN-1], CSRWriteValM[3:0]};
assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM;

View File

@ -29,7 +29,7 @@
module privdec import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic StallM,
input logic StallM, StallW, FlushW,
input logic [31:15] InstrM, // privileged instruction function field
input logic PrivilegedM, // is this a privileged instruction (from IEU controller)
input logic IllegalIEUFPUInstrM, // Not a legal IEU instruction
@ -39,7 +39,7 @@ module privdec import cvw::*; #(parameter cvw_t P) (
output logic IllegalInstrFaultM, // Illegal instruction
output logic EcallFaultM, BreakpointFaultM, // Ecall or breakpoint; must retire, so don't flush it when the trap occurs
output logic sretM, mretM, // return instructions
output logic wfiM, sfencevmaM // wfi / sfence.vma / sinval.vma instructions
output logic wfiM, wfiW, sfencevmaM // wfi / sfence.vma / sinval.vma instructions
);
logic rs1zeroM; // rs1 field = 0
@ -86,6 +86,8 @@ module privdec import cvw::*; #(parameter cvw_t P) (
// coverage on
end else assign WFITimeoutM = 0;
flopenrc #(1) wfiWReg(clk, reset, FlushW, ~StallW, wfiM, wfiW);
///////////////////////////////////////////
// Extract exceptions by name and handle them
///////////////////////////////////////////

View File

@ -115,15 +115,17 @@ module privileged import cvw::*; #(parameter cvw_t P) (
logic ExceptionM; // Memory stage instruction caused a fault
logic HPTWInstrAccessFaultM; // Hardware page table access fault while fetching instruction PTE
logic wfiW;
// track the current privilege level
privmode #(P) privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM,
.STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW);
// decode privileged instructions
privdec #(P) pmd(.clk, .reset, .StallM, .InstrM(InstrM[31:15]),
privdec #(P) pmd(.clk, .reset, .StallM, .StallW, .FlushW, .InstrM(InstrM[31:15]),
.PrivilegedM, .IllegalIEUFPUInstrM, .IllegalCSRAccessM,
.PrivilegeModeW, .STATUS_TSR, .STATUS_TVM, .STATUS_TW, .IllegalInstrFaultM,
.EcallFaultM, .BreakpointFaultM, .sretM, .mretM, .wfiM, .sfencevmaM);
.EcallFaultM, .BreakpointFaultM, .sretM, .mretM, .wfiM, .wfiW, .sfencevmaM);
// Control and Status Registers
csr #(P) csr(.clk, .reset, .FlushM, .FlushW, .StallE, .StallM, .StallW,
@ -156,5 +158,5 @@ module privileged import cvw::*; #(parameter cvw_t P) (
.mretM, .sretM, .PrivilegeModeW,
.MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW, .STATUS_MIE, .STATUS_SIE,
.InstrValidM, .CommittedM, .CommittedF,
.TrapM, .RetM, .wfiM, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .CauseM);
.TrapM, .RetM, .wfiM, .wfiW, .InterruptM, .ExceptionM, .IntPendingM, .DelegateM, .CauseM);
endmodule

View File

@ -33,7 +33,7 @@ module trap import cvw::*; #(parameter cvw_t P) (
input logic LoadAccessFaultM, StoreAmoAccessFaultM, EcallFaultM, InstrPageFaultM,
input logic LoadPageFaultM, StoreAmoPageFaultM, // various trap sources
input logic mretM, sretM, // return instructions
input logic wfiM, // wait for interrupt instruction
input logic wfiM, wfiW, // wait for interrupt instruction
input logic [1:0] PrivilegeModeW, // current privilege mode
input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, // interrupt pending, enabled, and delegate CSRs
input logic [15:0] MEDELEG_REGW, // exception delegation SR
@ -68,7 +68,8 @@ module trap import cvw::*; #(parameter cvw_t P) (
assign Committed = CommittedM | CommittedF;
assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW);
assign ValidIntsM = {12{~Committed}} & EnabledIntsM;
assign InterruptM = (|ValidIntsM) & InstrValidM; // suppress interrupt if the memory system has partially processed a request.
assign InterruptM = (|ValidIntsM) & InstrValidM & (~wfiM | wfiW); // suppress interrupt if the memory system has partially processed a request. Delay interrupt until wfi is in the W stage.
// wfiW is to support possible but unlikely back to back wfi instructions. wfiM would be high in the M stage, while also in the W stage.
assign DelegateM = P.S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) &
(PrivilegeModeW == P.U_MODE | PrivilegeModeW == P.S_MODE);