forked from Github_Repos/cvw
Merge branch 'main' of https://github.com/openhwgroup/cvw into bit-manip
This commit is contained in:
commit
07a43e1935
5
.editorconfig
Normal file
5
.editorconfig
Normal file
@ -0,0 +1,5 @@
|
||||
root = true
|
||||
|
||||
[src/**.sv]
|
||||
indent_style = space
|
||||
indent_size = 2
|
16
README.md
16
README.md
@ -261,6 +261,22 @@ $ make all
|
||||
|
||||
Note: When the make tasks complete, you’ll find source code in $RISCV/buildroot/output/build and the executables in $RISCV/buildroot/output/images.
|
||||
|
||||
### Generate load images for linux boot
|
||||
|
||||
The Questa linux boot uses preloaded bootram and ram memory. We use QEMU to generate these preloaded memory files. Files output in $RISCV/linux-testvectors
|
||||
|
||||
cd cvw/linux/testvector-generation
|
||||
./genInitMem.sh
|
||||
|
||||
This may require changing file permissions to the linux-testvectors directory.
|
||||
|
||||
### Generate QEMU linux trace
|
||||
|
||||
The linux testbench can instruction by instruction compare Wally's committed instructions against QEMU. To do this QEMU outputs a log file consisting of all instructions executed. Interrupts are handled by forcing the testbench to generate an interrupt at the same cycle as in QEMU. Generating this trace will take more than 24 hours.
|
||||
|
||||
cd cvw/linux/testvector-generation
|
||||
./genTrace.sh
|
||||
|
||||
### Download Synthesis Libraries
|
||||
|
||||
For logic synthesis, we need a synthesis tool (see Section 3.XREF) and a cell library. Clone the OSU 12-track cell library for the Skywater 130 nm process:
|
||||
|
57
bin/CModelBranchAccuracy.sh
Executable file
57
bin/CModelBranchAccuracy.sh
Executable file
@ -0,0 +1,57 @@
|
||||
#!/bin/bash
|
||||
|
||||
###########################################
|
||||
## Written: ross1728@gmail.com
|
||||
## Created: 12 March 2023
|
||||
## Modified:
|
||||
##
|
||||
## Purpose: Takes a directory of branch outcomes organized as 1 files per benchmark.
|
||||
## Computes the geometric mean.
|
||||
##
|
||||
## A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
##
|
||||
## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
##
|
||||
## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
##
|
||||
## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
## except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
## may obtain a copy of the License at
|
||||
##
|
||||
## https:##solderpad.org/licenses/SHL-2.1/
|
||||
##
|
||||
## Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
## either express or implied. See the License for the specific language governing permissions
|
||||
## and limitations under the License.
|
||||
################################################################################################
|
||||
|
||||
|
||||
Directory="$1"
|
||||
Files="$1/*.log"
|
||||
|
||||
for Pred in "bimodal" "gshare"
|
||||
do
|
||||
for Size in $(seq 6 2 16)
|
||||
do
|
||||
if [ $Pred = "gshare" ]; then
|
||||
SizeString="$Size $Size 18 1"
|
||||
elif [ $Pred = "bimodal" ]; then
|
||||
SizeString="$Size 18 1"
|
||||
fi
|
||||
|
||||
Product=1.0
|
||||
Count=0
|
||||
for File in $Files
|
||||
do
|
||||
#echo "sim_bp $Pred $Size $Size 18 1 $File | tail -1 | awk '{print $4}'"
|
||||
#echo "sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'"
|
||||
BMDR=`sim_bp $Pred $SizeString $File | tail -1 | awk '{print $4}'`
|
||||
Product=`echo "$Product * $BMDR" | bc`
|
||||
Count=$((Count+1))
|
||||
done
|
||||
|
||||
GeoMean=`perl -E "say $Product**(1/$Count)"`
|
||||
echo "$Pred$Size $GeoMean"
|
||||
done
|
||||
done
|
52
bin/SeparateBranch.sh
Executable file
52
bin/SeparateBranch.sh
Executable file
@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
###########################################
|
||||
## Written: ross1728@gmail.com
|
||||
## Created: 12 March 2023
|
||||
## Modified:
|
||||
##
|
||||
## Purpose: Converts a single branch.log containing multiple benchmark branch outcomes into
|
||||
## separate files, one for each program.x4
|
||||
##
|
||||
## A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
##
|
||||
## Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
##
|
||||
## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
##
|
||||
## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
## except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
## may obtain a copy of the License at
|
||||
##
|
||||
## https:##solderpad.org/licenses/SHL-2.1/
|
||||
##
|
||||
## Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
## License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
## either express or implied. See the License for the specific language governing permissions
|
||||
## and limitations under the License.
|
||||
################################################################################################
|
||||
|
||||
File="$1"
|
||||
TrainLineNumbers=`cat $File | grep -n "TRAIN" | awk -NF ':' '{print $1}'`
|
||||
BeginLineNumbers=`cat $File | grep -n "BEGIN" | awk -NF ':' '{print $1}'`
|
||||
Name=`cat $File | grep -n "BEGIN" | awk -NF '/' '{print $6_$4}'`
|
||||
EndLineNumbers=`cat $File | grep -n "END" | awk -NF ':' '{print $1}'`
|
||||
echo $Name
|
||||
echo $BeginLineNumbers
|
||||
echo $EndLineNumbers
|
||||
|
||||
NameArray=($Name)
|
||||
TrainLineNumberArray=($TrainLineNumbers)
|
||||
BeginLineNumberArray=($BeginLineNumbers)
|
||||
EndLineNumberArray=($EndLineNumbers)
|
||||
|
||||
mkdir -p branch
|
||||
Length=${#EndLineNumberArray[@]}
|
||||
for i in $(seq 0 1 $((Length-1)))
|
||||
do
|
||||
CurrName=${NameArray[$i]}
|
||||
CurrTrain=$((${TrainLineNumberArray[$i]}+1))
|
||||
CurrEnd=$((${EndLineNumberArray[$i]}-1))
|
||||
echo $CurrName, $CurrTrain, $CurrEnd
|
||||
sed -n "${CurrTrain},${CurrEnd}p" $File > branch/${CurrName}_branch.log
|
||||
done
|
@ -30,6 +30,18 @@ import sys
|
||||
import matplotlib.pyplot as plt
|
||||
import re
|
||||
|
||||
#RefData={'twobitCModel' :(['6', '8', '10', '12', '14', '16'],
|
||||
# [11.0680836450622, 8.53864970807778, 7.59565430177984, 6.38741598498948, 5.83662961500838, 5.83662961500838]),
|
||||
# 'gshareCModel' : (['6', '8', '10', '12', '14', '16'],
|
||||
# [14.5859173702079, 12.3634674403619, 10.5806018170154, 8.38831266973592, 6.37097544620762, 3.52638362703015])
|
||||
#}
|
||||
|
||||
RefData = [('twobitCModel6', 11.0501534891674), ('twobitCModel8', 8.51829052266352), ('twobitCModel10', 7.56775222626483),
|
||||
('twobitCModel12', 6.31366834586515), ('twobitCModel14', 5.72699936834177), ('twobitCModel16', 5.72699936834177),
|
||||
('gshareCModel6', 14.5731555979574), ('gshareCModel8', 12.3155658100497), ('gshareCModel10', 10.4589596630561),
|
||||
('gshareCModel12', 8.25796055444401), ('gshareCModel14', 6.23093702707613), ('gshareCModel16', 3.34001125650374)]
|
||||
|
||||
|
||||
def ComputeCPI(benchmark):
|
||||
'Computes and inserts CPI into benchmark stats.'
|
||||
(nameString, opt, dataDict) = benchmark
|
||||
@ -221,14 +233,15 @@ if(sys.argv[1] == '-b'):
|
||||
for benchmark in benchmarkAll:
|
||||
(name, opt, config, dataDict) = benchmark
|
||||
if name+'_'+opt in benchmarkDict:
|
||||
benchmarkDict[name+'_'+opt].append((config, dataDict['BTMR']))
|
||||
benchmarkDict[name+'_'+opt].append((config, dataDict['BDMR']))
|
||||
else:
|
||||
benchmarkDict[name+'_'+opt] = [(config, dataDict['BTMR'])]
|
||||
benchmarkDict[name+'_'+opt] = [(config, dataDict['BDMR'])]
|
||||
|
||||
size = len(benchmarkDict)
|
||||
index = 1
|
||||
if(summery == 0):
|
||||
#print('Number of plots', size)
|
||||
|
||||
for benchmarkName in benchmarkDict:
|
||||
currBenchmark = benchmarkDict[benchmarkName]
|
||||
(names, values) = FormatToPlot(currBenchmark)
|
||||
@ -241,6 +254,8 @@ if(sys.argv[1] == '-b'):
|
||||
index += 1
|
||||
else:
|
||||
combined = benchmarkDict['All_']
|
||||
# merge the reference data into rtl data
|
||||
combined.extend(RefData)
|
||||
(name, value) = FormatToPlot(combined)
|
||||
lst = []
|
||||
dct = {}
|
||||
@ -264,8 +279,8 @@ if(sys.argv[1] == '-b'):
|
||||
dct[PredType] = (currSize, currPercent)
|
||||
print(dct)
|
||||
fig, axes = plt.subplots()
|
||||
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x'}
|
||||
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue'}
|
||||
marker={'twobit' : '^', 'gshare' : 'o', 'global' : 's', 'gshareBasic' : '*', 'globalBasic' : 'x', 'btb': 'x', 'twobitCModel' : 'x', 'gshareCModel' : '*'}
|
||||
colors={'twobit' : 'black', 'gshare' : 'blue', 'global' : 'dodgerblue', 'gshareBasic' : 'turquoise', 'globalBasic' : 'lightsteelblue', 'btb' : 'blue', 'twobitCModel' : 'gray', 'gshareCModel' : 'dodgerblue'}
|
||||
for cat in dct:
|
||||
(x, y) = dct[cat]
|
||||
x=[int(2**int(v)) for v in x]
|
||||
|
1
bin/sim_bp
Symbolic link
1
bin/sim_bp
Symbolic link
@ -0,0 +1 @@
|
||||
../addins/branch-predictor-simulator/src/sim_bp
|
@ -62,7 +62,7 @@ make install
|
||||
# elf2hex
|
||||
cd $RISCV
|
||||
#export PATH=$RISCV/riscv-gnu-toolchain/bin:$PATH
|
||||
gexport PATH=$RISCV/bin:$PATH
|
||||
export PATH=$RISCV/bin:$PATH
|
||||
git clone https://github.com/sifive/elf2hex.git
|
||||
cd elf2hex
|
||||
autoreconf -i
|
||||
|
5
setup.sh
5
setup.sh
@ -48,8 +48,9 @@ if [ -e "$IDV" ]; then
|
||||
export IMPERAS_HOME=$IDV/Imperas
|
||||
export IMPERAS_PERSONALITY=CPUMAN_DV_ASYNC
|
||||
export ROOTDIR=~/
|
||||
source ${IDV}/Imperas/bin/setup.sh
|
||||
setupImperas ${IDV}/Imperas
|
||||
source ${IMPERAS_HOME}/bin/setup.sh
|
||||
setupImperas ${IMPERAS_HOME}
|
||||
export PATH=$IDV/scripts/cvw:$PATH
|
||||
fi
|
||||
|
||||
|
||||
|
@ -1,42 +1,55 @@
|
||||
#--showoverrides
|
||||
#--help --helpall
|
||||
#--showcommands
|
||||
|
||||
--override cpu/show_c_prefix=T
|
||||
# Core settings
|
||||
--override cpu/unaligned=F
|
||||
--override cpu/ignore_non_leaf_DAU=1
|
||||
--override cpu/wfi_is_nop=T
|
||||
--override cpu/mimpid=0x100
|
||||
--override cpu/misa_Extensions_mask=0x0
|
||||
|
||||
# this should be 16 not 0
|
||||
# THIS NEEDS FIXING to 16
|
||||
--override cpu/PMP_registers=0
|
||||
|
||||
# PMA Settings
|
||||
# 'r': read access allowed
|
||||
# 'w': write access allowed
|
||||
# 'x': execute access allowed
|
||||
# 'a': aligned access required
|
||||
# 'A': atomic instructions NOT allowed (actually USER1 privilege needed)
|
||||
# 'P': push/pop instructions NOT allowed (actually USER2 privilege needed)
|
||||
# '1': 1-byte accesses allowed
|
||||
# '2': 2-byte accesses allowed
|
||||
# '4': 4-byte accesses allowed
|
||||
# '8': 8-byte accesses allowed
|
||||
# '-', space: ignored (use for input string formatting).
|
||||
#
|
||||
# SV39 Memory 0x0000000000 0x7FFFFFFFFF
|
||||
#
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0x7FFFFFFFFF -attributes " ------ ---- "; # INITIAL
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000001000 -hi 0x0000001FFF -attributes " r-x-A- 1248 "; # BOOTROM
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000012100 -hi 0x000001211F -attributes " rw--A- --48 "; # SDC
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0002000000 -hi 0x000200FFFF -attributes " rw--A- 1248 "; # CLINT
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x000C000000 -hi 0x000FFFFFFF -attributes " rw--A- --4- "; # PLIC
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0010000000 -hi 0x0010000007 -attributes " rw--A- 1--- "; # UART0 error - 0x10000000 - 0x100000FF
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0010060000 -hi 0x00100600FF -attributes " rw--A- --4- "; # GPIO error - 0x10006000 - 0x100060FF
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0080000000 -hi 0x008FFFFFFF -attributes " rwx--- 1248 "; # UNCORE_RAM
|
||||
|
||||
# Enable the Imperas instruction coverage
|
||||
#-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0
|
||||
#-override refRoot/cpu/cv/cover=basic
|
||||
#-override refRoot/cpu/cv/extensions=RV32I
|
||||
|
||||
# Add Imperas simulator application instruction tracing
|
||||
--trace
|
||||
--tracechange
|
||||
--traceshowicount
|
||||
--tracemode
|
||||
--tracemem ASX
|
||||
--monitornetschange
|
||||
--override cpu/show_c_prefix=T
|
||||
--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange
|
||||
|
||||
# Exceptions and pagetables debug
|
||||
--override cpu/debugflags=6
|
||||
|
||||
# Turn on verbose output for Imperas simulator
|
||||
# Turn on verbose output for Imperas simulator and Model
|
||||
--verbose
|
||||
|
||||
# Turn on verbose output for RISCV model
|
||||
--override cpu/verbose=1
|
||||
|
||||
# Store simulator output to logfile
|
||||
--output imperas.log
|
||||
|
||||
|
||||
# ignore settings of bits DAU for non leaf page table walks
|
||||
--override cpu/ignore_non_leaf_DAU=1
|
||||
|
||||
# mimpid = 0x100
|
||||
--override cpu/mimpid=0x100
|
||||
|
@ -27,6 +27,8 @@ from collections import namedtuple
|
||||
regressionDir = os.path.dirname(os.path.abspath(__file__))
|
||||
os.chdir(regressionDir)
|
||||
|
||||
coverage = '-coverage' in sys.argv
|
||||
|
||||
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
|
||||
# name: the name of this test configuration (used in printing human-readable
|
||||
# output and picking logfile names)
|
||||
@ -66,14 +68,6 @@ tc = TestCase(
|
||||
configs.append(tc)
|
||||
|
||||
tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused
|
||||
tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", "wally64a", "wally64periph", "wally64priv"]
|
||||
for test in tests64gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv64gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests64i = ["arch64i"]
|
||||
for test in tests64i:
|
||||
@ -131,6 +125,20 @@ for test in ahbTests:
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc ahb "+test[0]+" "+test[1]+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"]
|
||||
if (coverage): # delete all but 64gc tests when running coverage
|
||||
configs = []
|
||||
coverStr = '-coverage'
|
||||
else:
|
||||
coverStr = ''
|
||||
for test in tests64gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv64gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+" " + coverStr + "\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
|
||||
import os
|
||||
@ -158,7 +166,7 @@ def run_test_case(config):
|
||||
|
||||
def main():
|
||||
"""Run the tests and count the failures"""
|
||||
global configs
|
||||
global configs, coverage
|
||||
try:
|
||||
os.chdir(regressionDir)
|
||||
os.mkdir("logs")
|
||||
@ -183,6 +191,10 @@ def main():
|
||||
elif '-buildroot' in sys.argv:
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs=[getBuildrootTC(boot=True)]
|
||||
elif '-coverage' in sys.argv:
|
||||
TIMEOUT_DUR = 20*60 # seconds
|
||||
#configs.append(getBuildrootTC(boot=False))
|
||||
os.system('rm cov/*.ucdb')
|
||||
else:
|
||||
TIMEOUT_DUR = 10*60 # seconds
|
||||
configs.append(getBuildrootTC(boot=False))
|
||||
@ -201,6 +213,12 @@ def main():
|
||||
num_fail+=1
|
||||
print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
|
||||
|
||||
# Coverage report
|
||||
if coverage:
|
||||
print('Generating coverage report')
|
||||
os.system('vcover merge -out cov/cov.ucdb cov/rv64gc_arch64i.ucdb cov/rv64gc*.ucdb -logfile cov/log')
|
||||
os.system('vcover report -details cov/cov.ucdb > cov/rv64gc_coverage.rpt')
|
||||
os.system('vcover report -html cov/cov.ucdb')
|
||||
# Count the number of failures
|
||||
if num_fail:
|
||||
print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
|
||||
|
@ -40,6 +40,17 @@ if {$2 eq "ahb"} {
|
||||
}
|
||||
vlib wkdir/work_${1}_${2}
|
||||
}
|
||||
# Create directory for coverage data
|
||||
mkdir -p cov
|
||||
|
||||
# Check if measuring coverage
|
||||
set coverage 0
|
||||
if {$argc >= 3} {
|
||||
if {$3 eq "-coverage"} {
|
||||
set coverage 1
|
||||
}
|
||||
}
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
@ -112,20 +123,26 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
vlog -lint -work wkdir/work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 -suppress 7063,2596,13286
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G TEST=$2 -o testbenchopt
|
||||
vsim -lib wkdir/work_${1}_${2} testbenchopt -fatal 7 -suppress 3829
|
||||
# Adding coverage increases runtime from 2:00 to 4:29. Can't run it all the time
|
||||
#vopt work_$2.testbench -work work_$2 -o workopt_$2 +cover=sbectf
|
||||
#vsim -coverage -lib work_$2 workopt_$2
|
||||
if {$coverage} {
|
||||
vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G TEST=$2 -o testbenchopt +cover=sbectf
|
||||
vsim -lib wkdir/work_${1}_${2} testbenchopt -fatal 7 -suppress 3829 -coverage
|
||||
} else {
|
||||
vopt wkdir/work_${1}_${2}.testbench -work wkdir/work_${1}_${2} -G TEST=$2 -o testbenchopt
|
||||
vsim -lib wkdir/work_${1}_${2} testbenchopt -fatal 7 -suppress 3829
|
||||
}
|
||||
# vsim -lib wkdir/work_${1}_${2} testbenchopt -fatal 7 -suppress 3829
|
||||
# power add generates the logging necessary for said generation.
|
||||
# power add -r /dut/core/*
|
||||
run -all
|
||||
# power off -r /dut/core/*
|
||||
}
|
||||
|
||||
#coverage report -file wally-coverage.txt
|
||||
if {$coverage} {
|
||||
do coverage-exclusions.do
|
||||
coverage save -instance /testbench/dut cov/${1}_${2}.ucdb
|
||||
}
|
||||
|
||||
# These aren't doing anything helpful
|
||||
#coverage report -memory
|
||||
#profile report -calltree -file wally-calltree.rpt -cutoff 2
|
||||
#power report -all -bsaif power.saif
|
||||
quit
|
||||
|
@ -24,6 +24,7 @@ vlib work
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
# *** modelsim won't take `PA_BITS, but will take other defines for the lengths of DTIM_RANGE and IROM_LEN. For now just live with the warnings.
|
||||
|
||||
vlog +incdir+../config/$1 \
|
||||
+incdir+../config/shared \
|
||||
+define+USE_IMPERAS_DV \
|
||||
@ -42,6 +43,7 @@ vlog +incdir+../config/$1 \
|
||||
../src/*/*/*.sv \
|
||||
-suppress 2583 \
|
||||
-suppress 7063
|
||||
|
||||
vopt +acc work.testbench -G DEBUG=1 -o workopt
|
||||
vsim workopt +nowarn3829 -fatal 7 \
|
||||
-sv_lib $env(IMPERAS_HOME)/lib/Linux64/ImperasLib/imperas.com/verification/riscv/1.0/model \
|
||||
|
@ -366,11 +366,9 @@ add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VI
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -color Gold /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/WalkerState
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/HPTWAdr
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PTE
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PCFSpill
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/NextPageType
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PageType
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ValidNonLeafPTE
|
||||
add wave -noupdate -group lsu -expand -group ptwalker /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/PCFSpill
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/ITLBMissF
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/DTLBMissM
|
||||
add wave -noupdate -group lsu -expand -group ptwalker -expand -group types /testbench/dut/core/lsu/VIRTMEM_SUPPORTED/hptw/ITLBWriteF
|
||||
@ -467,7 +465,6 @@ add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/d
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallD
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/ITLBMissF
|
||||
add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/TakeSpillF
|
||||
add wave -noupdate -group ifu -group Spill /testbench/dut/core/ifu/SelNextSpillF
|
||||
add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE
|
||||
add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HBURST
|
||||
add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HTRANS
|
||||
@ -635,8 +632,10 @@ add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/DCacheMis
|
||||
add wave -noupdate /testbench/dut/core/priv/priv/csr/counters/counters/InstrValidNotFlushedM
|
||||
add wave -noupdate /testbench/clk
|
||||
add wave -noupdate /testbench/HPMCSample/InitialHPMCOUNTERH
|
||||
add wave -noupdate /testbench/HPMCSample/EndSample
|
||||
add wave -noupdate /testbench/HPMCSample/StartSample
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {49231900 ns} 0} {{Cursor 5} {394987 ns} 1}
|
||||
WaveRestoreCursors {{Cursor 2} {314596 ns} 1} {{Cursor 3} {314460 ns} 1} {{Cursor 4} {391801 ns} 1} {{Cursor 4} {23 ns} 0} {{Cursor 5} {394987 ns} 1}
|
||||
quietly wave cursor active 4
|
||||
configure wave -namecolwidth 250
|
||||
configure wave -valuecolwidth 194
|
||||
@ -652,4 +651,4 @@ configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {49231842 ns} {49231960 ns}
|
||||
WaveRestoreZoom {0 ns} {52 ns}
|
||||
|
24
src/cache/cache.sv
vendored
24
src/cache/cache.sv
vendored
@ -29,7 +29,7 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, DCACHE) (
|
||||
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTERVAL, READ_ONLY_CACHE) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
|
||||
@ -39,7 +39,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
input logic [1:0] CacheAtomic, // Atomic operation
|
||||
input logic FlushCache, // Flush all dirty lines back to memory
|
||||
input logic InvalidateCache, // Clear all valid bits
|
||||
input logic [11:0] NextAdr, // Virtual address, but we only use the lower 12 bits.
|
||||
input logic [11:0] NextSet, // Virtual address, but we only use the lower 12 bits.
|
||||
input logic [`PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [(WORDLEN-1)/8:0] ByteMask, // Which bytes to write (D$ only)
|
||||
input logic [WORDLEN-1:0] CacheWriteData, // Data to write to cache (D$ only)
|
||||
@ -50,7 +50,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
output logic CacheMiss, // Cache miss
|
||||
output logic CacheAccess, // Cache access
|
||||
// lsu control
|
||||
input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextAdr
|
||||
input logic SelHPTW, // Use PAdr from Hardware Page Table Walker rather than NextSet
|
||||
// Bus fsm interface
|
||||
input logic CacheBusAck, // Bus operation completed
|
||||
input logic SelBusBeat, // Word in cache line comes from BeatCount
|
||||
@ -74,7 +74,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
|
||||
logic SelAdr;
|
||||
logic [1:0] AdrSelMuxSel;
|
||||
logic [SETLEN-1:0] CAdr;
|
||||
logic [SETLEN-1:0] CacheSet;
|
||||
logic [LINELEN-1:0] LineWriteData;
|
||||
logic ClearValid, ClearDirty, SetDirty, SetValid;
|
||||
logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0];
|
||||
@ -106,24 +106,24 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
// Read Path
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Choose read address (CAdr). Normally use NextAdr, but use PAdr during stalls
|
||||
// Choose read address (CacheSet). Normally use NextSet, but use PAdr during stalls
|
||||
// and FlushAdr when handling D$ flushes
|
||||
// The icache must update to the newest PCNextF on flush as it is probably a trap. Trap
|
||||
// sets PCNextF to XTVEC and the icache must start reading the instruction.
|
||||
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((DCACHE == 0) & FlushStage))};
|
||||
mux3 #(SETLEN) AdrSelMux(NextAdr[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
|
||||
AdrSelMuxSel, CAdr);
|
||||
assign AdrSelMuxSel = {SelFlush, ((SelAdr | SelHPTW) & ~((READ_ONLY_CACHE == 1) & FlushStage))};
|
||||
mux3 #(SETLEN) AdrSelMux(NextSet[SETTOP-1:OFFSETLEN], PAdr[SETTOP-1:OFFSETLEN], FlushAdr,
|
||||
AdrSelMuxSel, CacheSet);
|
||||
|
||||
// Array of cache ways, along with victim, hit, dirty, and read merging logic
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, DCACHE) CacheWays[NUMWAYS-1:0](
|
||||
.clk, .reset, .CacheEn, .CAdr, .PAdr, .LineWriteData, .LineByteMask,
|
||||
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0](
|
||||
.clk, .reset, .CacheEn, .CacheSet, .PAdr, .LineWriteData, .LineByteMask,
|
||||
.SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelWriteback, .VictimWay,
|
||||
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache);
|
||||
|
||||
// Select victim way for associative caches
|
||||
if(NUMWAYS > 1) begin:vict
|
||||
cacheLRU #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cacheLRU(
|
||||
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CAdr, .LRUWriteEn(LRUWriteEn & ~FlushStage),
|
||||
.clk, .reset, .CacheEn, .FlushStage, .HitWay, .ValidWay, .VictimWay, .CacheSet, .LRUWriteEn(LRUWriteEn & ~FlushStage),
|
||||
.SetValid, .PAdr(PAdr[SETTOP-1:OFFSETLEN]), .InvalidateCache, .FlushCache);
|
||||
end else
|
||||
assign VictimWay = 1'b1; // one hot.
|
||||
@ -138,7 +138,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
|
||||
or_rows #(NUMWAYS, TAGLEN) TagAOMux(.a(TagWay), .y(Tag));
|
||||
|
||||
// Data cache needs to choose word offset from PAdr or BeatCount to writeback dirty lines
|
||||
if(DCACHE)
|
||||
if(!READ_ONLY_CACHE)
|
||||
mux2 #(LOGBWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
|
||||
.d1(BeatCount), .s(SelBusBeat),
|
||||
.y(WordOffsetAddr));
|
||||
|
9
src/cache/cacheLRU.sv
vendored
9
src/cache/cacheLRU.sv
vendored
@ -37,7 +37,7 @@ module cacheLRU
|
||||
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
|
||||
input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag
|
||||
input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag
|
||||
input logic [SETLEN-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
|
||||
input logic [SETLEN-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
|
||||
input logic [SETLEN-1:0] PAdr, // Physical address
|
||||
input logic LRUWriteEn, // Update the LRU state
|
||||
input logic SetValid, // Set the dirty bit in the selected way and set
|
||||
@ -124,8 +124,7 @@ module cacheLRU
|
||||
|
||||
// LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice.
|
||||
// This is a two port memory.
|
||||
// Every cycle must read from CAdr and each load/store must write the new LRU.
|
||||
// this is still wrong.***************************
|
||||
// Every cycle must read from CacheSet and each load/store must write the new LRU.
|
||||
always_ff @(posedge clk) begin
|
||||
if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
|
||||
if(CacheEn) begin
|
||||
@ -133,10 +132,10 @@ module cacheLRU
|
||||
else if (LRUWriteEn & ~FlushStage) begin
|
||||
LRUMemory[PAdr] <= NextLRU;
|
||||
end
|
||||
if(LRUWriteEn & ~FlushStage & (PAdr == CAdr))
|
||||
if(LRUWriteEn & ~FlushStage & (PAdr == CacheSet))
|
||||
CurrLRU <= #1 NextLRU;
|
||||
else
|
||||
CurrLRU <= #1 LRUMemory[CAdr];
|
||||
CurrLRU <= #1 LRUMemory[CacheSet];
|
||||
end
|
||||
end
|
||||
|
||||
|
18
src/cache/cacheway.sv
vendored
18
src/cache/cacheway.sv
vendored
@ -30,12 +30,12 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
OFFSETLEN = 5, INDEXLEN = 9, DIRTY_BITS = 1) (
|
||||
OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
|
||||
input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant
|
||||
input logic [$clog2(NUMLINES)-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
|
||||
input logic [$clog2(NUMLINES)-1:0] CacheSet, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr
|
||||
input logic [`PA_BITS-1:0] PAdr, // Physical address
|
||||
input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only)
|
||||
input logic SetValid, // Set the dirty bit in the selected way and set
|
||||
@ -114,7 +114,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk, .ce(CacheEn),
|
||||
.addr(CAdr), .dout(ReadTag), .bwe('1),
|
||||
.addr(CacheSet), .dout(ReadTag), .bwe('1),
|
||||
.din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN));
|
||||
|
||||
|
||||
@ -136,7 +136,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
localparam LOGNUMSRAM = $clog2(NUMSRAM);
|
||||
|
||||
for(words = 0; words < NUMSRAM; words++) begin: word
|
||||
ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CAdr),
|
||||
ram1p1rwbe #(.DEPTH(NUMLINES), .WIDTH(SRAMLEN)) CacheDataMem(.clk, .ce(CacheEn), .addr(CacheSet),
|
||||
.dout(ReadDataLine[SRAMLEN*(words+1)-1:SRAMLEN*words]),
|
||||
.din(LineWriteData[SRAMLEN*(words+1)-1:SRAMLEN*words]),
|
||||
.we(SelectedWriteWordEn), .bwe(FinalByteMask[SRAMLENINBYTES*(words+1)-1:SRAMLENINBYTES*words]));
|
||||
@ -152,9 +152,9 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
always_ff @(posedge clk) begin // Valid bit array,
|
||||
if (reset) ValidBits <= #1 '0;
|
||||
if(CacheEn) begin
|
||||
ValidWay <= #1 ValidBits[CAdr];
|
||||
ValidWay <= #1 ValidBits[CacheSet];
|
||||
if(InvalidateCache) ValidBits <= #1 '0;
|
||||
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay;
|
||||
else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay;
|
||||
end
|
||||
end
|
||||
|
||||
@ -163,13 +163,13 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Dirty bits
|
||||
if (DIRTY_BITS) begin:dirty
|
||||
if (!READ_ONLY_CACHE) begin:dirty
|
||||
always_ff @(posedge clk) begin
|
||||
// reset is optional. Consider merging with TAG array in the future.
|
||||
//if (reset) DirtyBits <= #1 {NUMLINES{1'b0}};
|
||||
if(CacheEn) begin
|
||||
Dirty <= #1 DirtyBits[CAdr];
|
||||
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CAdr] <= #1 SetDirtyWay;
|
||||
Dirty <= #1 DirtyBits[CacheSet];
|
||||
if((SetDirtyWay | ClearDirtyWay) & ~FlushStage) DirtyBits[CacheSet] <= #1 SetDirtyWay;
|
||||
end
|
||||
end
|
||||
end else assign Dirty = 1'b0;
|
||||
|
@ -69,6 +69,6 @@ module fdivsqrtexpcalc(
|
||||
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
|
||||
|
||||
// correct exponent for subnormal input's normalization shifts
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
|
||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}}; // *** why Xzero? Is this a hack for postprocessor?
|
||||
assign Qe = Sqrt ? SExp : DExp;
|
||||
endmodule
|
||||
|
@ -151,7 +151,7 @@ module fdivsqrtpreproc (
|
||||
lzc #(`DIVb) lzcY (IFNormLenD, mE);
|
||||
|
||||
// Normalization shift
|
||||
assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, 1'b1});
|
||||
assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, 1'b1}); // *** try to remove this +1
|
||||
assign DPreproc = IFNormLenD << (mE + {{`DIVBLEN{1'b0}}, 1'b1});
|
||||
|
||||
// append leading 1 (for normal inputs)
|
||||
|
@ -43,7 +43,7 @@ module hazard (
|
||||
);
|
||||
|
||||
logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause;
|
||||
logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW;
|
||||
logic LatestUnstalledD, LatestUnstalledE, LatestUnstalledM, LatestUnstalledW;
|
||||
logic FlushDCause, FlushECause, FlushMCause, FlushWCause;
|
||||
|
||||
// stalls and flushes
|
||||
@ -95,14 +95,14 @@ module hazard (
|
||||
assign #1 StallW = StallWCause;
|
||||
|
||||
// detect the first stage that is not stalled
|
||||
assign FirstUnstalledD = ~StallD & StallF;
|
||||
assign FirstUnstalledE = ~StallE & StallD;
|
||||
assign FirstUnstalledM = ~StallM & StallE;
|
||||
assign FirstUnstalledW = ~StallW & StallM;
|
||||
assign LatestUnstalledD = ~StallD & StallF;
|
||||
assign LatestUnstalledE = ~StallE & StallD;
|
||||
assign LatestUnstalledM = ~StallM & StallE;
|
||||
assign LatestUnstalledW = ~StallW & StallM;
|
||||
|
||||
// Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush
|
||||
assign #1 FlushD = FirstUnstalledD | FlushDCause;
|
||||
assign #1 FlushE = FirstUnstalledE | FlushECause;
|
||||
assign #1 FlushM = FirstUnstalledM | FlushMCause;
|
||||
assign #1 FlushW = FirstUnstalledW | FlushWCause;
|
||||
assign #1 FlushD = LatestUnstalledD | FlushDCause;
|
||||
assign #1 FlushE = LatestUnstalledE | FlushECause;
|
||||
assign #1 FlushM = LatestUnstalledM | FlushMCause;
|
||||
assign #1 FlushW = LatestUnstalledW | FlushWCause;
|
||||
endmodule
|
||||
|
@ -116,10 +116,6 @@ module alu #(parameter WIDTH=32) (
|
||||
assign LT = Asign & ~Bsign | Asign & Neg | ~Bsign & Neg;
|
||||
assign LTU = ~Carry;
|
||||
|
||||
// SLT
|
||||
assign SLT = {{(WIDTH-1){1'b0}}, LT};
|
||||
assign SLTU = {{(WIDTH-1){1'b0}}, LTU};
|
||||
|
||||
// Select appropriate ALU Result
|
||||
if (`ZBS_SUPPORTED | `ZBB_SUPPORTED) begin
|
||||
always_comb
|
||||
|
@ -71,7 +71,7 @@ module bpred (
|
||||
|
||||
logic [1:0] BPDirPredF;
|
||||
|
||||
logic [`XLEN-1:0] BTAF, RASPCF;
|
||||
logic [`XLEN-1:0] BPBTAF, RASPCF;
|
||||
logic BPPCWrongE;
|
||||
logic IClassWrongE;
|
||||
logic BPDirPredWrongE;
|
||||
@ -85,7 +85,7 @@ module bpred (
|
||||
logic BTBTargetWrongE;
|
||||
logic RASTargetWrongE;
|
||||
|
||||
logic [`XLEN-1:0] BTAD;
|
||||
logic [`XLEN-1:0] BPBTAD;
|
||||
|
||||
logic BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF;
|
||||
logic BPBranchF, BPJumpF, BPReturnF, BPCallF;
|
||||
@ -95,7 +95,7 @@ module bpred (
|
||||
logic BranchM, JumpM, ReturnM, CallM;
|
||||
logic BranchW, JumpW, ReturnW, CallW;
|
||||
logic BPReturnWrongD;
|
||||
logic [`XLEN-1:0] BTAE;
|
||||
logic [`XLEN-1:0] BPBTAE;
|
||||
|
||||
|
||||
|
||||
@ -150,7 +150,7 @@ module bpred (
|
||||
btb #(`BTB_SIZE)
|
||||
TargetPredictor(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .StallW, .FlushD, .FlushE, .FlushM, .FlushW,
|
||||
.PCNextF, .PCF, .PCD, .PCE, .PCM,
|
||||
.BTAF, .BTAD, .BTAE,
|
||||
.BPBTAF, .BPBTAD, .BPBTAE,
|
||||
.BTBIClassF({BTBCallF, BTBReturnF, BTBJumpF, BTBBranchF}),
|
||||
.IClassWrongM, .IClassWrongE,
|
||||
.IEUAdrE, .IEUAdrM,
|
||||
@ -181,7 +181,7 @@ module bpred (
|
||||
|
||||
// Output the predicted PC or corrected PC on miss-predict.
|
||||
assign BPPCSrcF = (BPBranchF & BPDirPredF[1]) | BPJumpF;
|
||||
mux2 #(`XLEN) pcmuxbp(BTAF, RASPCF, BPReturnF, BPPCF);
|
||||
mux2 #(`XLEN) pcmuxbp(BPBTAF, RASPCF, BPReturnF, BPPCF);
|
||||
// Selects the BP or PC+2/4.
|
||||
mux2 #(`XLEN) pcmux0(PCPlus2or4F, BPPCF, BPPCSrcF, PC0NextF);
|
||||
// If the prediction is wrong select the correct address.
|
||||
@ -196,7 +196,7 @@ module bpred (
|
||||
|
||||
if(`ZICOUNTERS_SUPPORTED) begin
|
||||
logic [`XLEN-1:0] RASPCD, RASPCE;
|
||||
logic BTBPredPCWrongE, RASPredPCWrongE;
|
||||
logic BTAWrongE, RASPredPCWrongE;
|
||||
// performance counters
|
||||
// 1. class (class wrong / minstret) (IClassWrongM / csr) // Correct now
|
||||
// 2. target btb (btb target wrong / class[0,1,3]) (btb target wrong / (br + j + jal)
|
||||
@ -207,14 +207,14 @@ module bpred (
|
||||
// could be wrong or the fall through address selected for branch predict not taken.
|
||||
// By pipeline the BTB's PC and RAS address through the pipeline we can measure the accuracy of
|
||||
// both without the above inaccuracies.
|
||||
// **** use BTAWrongM from BTB.
|
||||
assign BTBPredPCWrongE = (BTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
|
||||
// **** use BPBTAWrongM from BTB.
|
||||
assign BTAWrongE = (BPBTAE != IEUAdrE) & (BranchE | JumpE & ~ReturnE) & PCSrcE;
|
||||
assign RASPredPCWrongE = (RASPCE != IEUAdrE) & ReturnE & PCSrcE;
|
||||
|
||||
flopenrc #(`XLEN) RASTargetDReg(clk, reset, FlushD, ~StallD, RASPCF, RASPCD);
|
||||
flopenrc #(`XLEN) RASTargetEReg(clk, reset, FlushE, ~StallE, RASPCD, RASPCE);
|
||||
flopenrc #(3) BPPredWrongRegM(clk, reset, FlushM, ~StallM,
|
||||
{BPDirPredWrongE, BTBPredPCWrongE, RASPredPCWrongE},
|
||||
{BPDirPredWrongE, BTAWrongE, RASPredPCWrongE},
|
||||
{BPDirPredWrongM, BTAWrongM, RASPredPCWrongM});
|
||||
|
||||
end else begin
|
||||
|
@ -35,9 +35,9 @@ module btb #(parameter Depth = 10 ) (
|
||||
input logic reset,
|
||||
input logic StallF, StallD, StallE, StallM, StallW, FlushD, FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] PCNextF, PCF, PCD, PCE, PCM,// PC at various stages
|
||||
output logic [`XLEN-1:0] BTAF, // BTB's guess at PC
|
||||
output logic [`XLEN-1:0] BTAD,
|
||||
output logic [`XLEN-1:0] BTAE,
|
||||
output logic [`XLEN-1:0] BPBTAF, // BTB's guess at PC
|
||||
output logic [`XLEN-1:0] BPBTAD,
|
||||
output logic [`XLEN-1:0] BPBTAE,
|
||||
output logic [3:0] BTBIClassF, // BTB's guess at instruction class
|
||||
// update
|
||||
input logic IClassWrongM, // BTB's instruction class guess was wrong
|
||||
@ -57,8 +57,8 @@ module btb #(parameter Depth = 10 ) (
|
||||
logic [`XLEN+3:0] TableBTBPredF;
|
||||
logic [`XLEN-1:0] IEUAdrW;
|
||||
logic [`XLEN-1:0] PCW;
|
||||
logic BTBWrongE, BTAWrongE;
|
||||
logic BTBWrongM, BTAWrongM;
|
||||
logic BTBWrongE, BPBTAWrongE;
|
||||
logic BTBWrongM, BPBTAWrongM;
|
||||
|
||||
|
||||
// hashing function for indexing the PC
|
||||
@ -84,12 +84,12 @@ module btb #(parameter Depth = 10 ) (
|
||||
assign MatchW = PCFIndex == PCWIndex;
|
||||
assign MatchX = MatchD | MatchE | MatchM | MatchW;
|
||||
|
||||
assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BTAD} :
|
||||
assign ForwardBTBPredictionF = MatchD ? {InstrClassD, BPBTAD} :
|
||||
MatchE ? {InstrClassE, IEUAdrE} :
|
||||
MatchM ? {InstrClassM, IEUAdrM} :
|
||||
{InstrClassW, IEUAdrW} ;
|
||||
|
||||
assign {BTBIClassF, BTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF};
|
||||
assign {BTBIClassF, BPBTAF} = MatchX ? ForwardBTBPredictionF : {TableBTBPredF};
|
||||
|
||||
|
||||
// An optimization may be using a PC relative address.
|
||||
@ -97,16 +97,16 @@ module btb #(parameter Depth = 10 ) (
|
||||
.clk, .ce1(~StallF | reset), .ra1(PCNextFIndex), .rd1(TableBTBPredF),
|
||||
.ce2(~StallW & ~FlushW), .wa2(PCMIndex), .wd2({InstrClassM, IEUAdrM}), .we2(BTBWrongM), .bwe2('1));
|
||||
|
||||
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BTAF, BTAD);
|
||||
flopenrc #(`XLEN) BTBD(clk, reset, FlushD, ~StallD, BPBTAF, BPBTAD);
|
||||
|
||||
// BTAE is not strickly necessary. However it is used by two parts of wally.
|
||||
// BPBTAE is not strickly necessary. However it is used by two parts of wally.
|
||||
// 1. It gates updates to the BTB when the prediction does not change. This save power.
|
||||
// 2. BTAWrongE is used by the performance counters to track when the BTB's BTA or instruction class is wrong.
|
||||
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BTAD, BTAE);
|
||||
assign BTAWrongE = (BTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
|
||||
// 2. BPBTAWrongE is used by the performance counters to track when the BTB's BPBTA or instruction class is wrong.
|
||||
flopenrc #(`XLEN) BTBTargetEReg(clk, reset, FlushE, ~StallE, BPBTAD, BPBTAE);
|
||||
assign BPBTAWrongE = (BPBTAE != IEUAdrE) & (InstrClassE[0] | InstrClassE[1] & ~InstrClassE[2]);
|
||||
|
||||
flopenrc #(1) BTAWrongMReg(clk, reset, FlushM, ~StallM, BTAWrongE, BTAWrongM);
|
||||
assign BTBWrongM = BTAWrongM | IClassWrongM;
|
||||
flopenrc #(1) BPBTAWrongMReg(clk, reset, FlushM, ~StallM, BPBTAWrongE, BPBTAWrongM);
|
||||
assign BTBWrongM = BPBTAWrongM | IClassWrongM;
|
||||
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW);
|
||||
flopenr #(`XLEN) IEUAdrWReg(clk, reset, ~StallW, IEUAdrM, IEUAdrW);
|
||||
|
@ -233,7 +233,7 @@ module ifu (
|
||||
assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0;
|
||||
cache #(.LINELEN(`ICACHE_LINELENINBITS),
|
||||
.NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS),
|
||||
.NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0))
|
||||
.NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1))
|
||||
icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD),
|
||||
.FetchBuffer, .CacheBusAck(ICacheBusAck),
|
||||
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
|
||||
@ -245,7 +245,7 @@ module ifu (
|
||||
.CacheWriteData('0),
|
||||
.CacheRW(CacheRWF),
|
||||
.CacheAtomic('0), .FlushCache('0),
|
||||
.NextAdr(PCSpillNextF[11:0]),
|
||||
.NextSet(PCSpillNextF[11:0]),
|
||||
.PAdr(PCPF),
|
||||
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
|
||||
ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW)
|
||||
|
@ -264,9 +264,9 @@ module lsu (
|
||||
assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW);
|
||||
|
||||
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
|
||||
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache(
|
||||
.NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .READ_ONLY_CACHE(0)) dcache(
|
||||
.clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM),
|
||||
.FlushCache(FlushDCache), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM),
|
||||
.FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM),
|
||||
.ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]),
|
||||
.CacheWriteData(LSUWriteDataM), .SelHPTW,
|
||||
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
|
||||
|
@ -32,11 +32,11 @@ module mdu(
|
||||
input logic clk, reset,
|
||||
input logic StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
|
||||
input logic [2:0] Funct3E, Funct3M, // type of MDU operation
|
||||
input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions
|
||||
output logic [`XLEN-1:0] MDUResultW, // multiply/divide result
|
||||
output logic DivBusyE // busy signal to stall pipeline in Execute stage
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // inputs A and B from IEU forwarding mux output
|
||||
input logic [2:0] Funct3E, Funct3M, // type of MDU operation
|
||||
input logic IntDivE, W64E, // Integer division/remainder, and W-type instrutions
|
||||
output logic [`XLEN-1:0] MDUResultW, // multiply/divide result
|
||||
output logic DivBusyE // busy signal to stall pipeline in Execute stage
|
||||
);
|
||||
|
||||
logic [`XLEN*2-1:0] ProdM; // double-width product from mul
|
||||
|
@ -133,7 +133,7 @@ module csr #(parameter
|
||||
if (InterruptM) NextFaultMtvalM = 0;
|
||||
else case (CauseM)
|
||||
12, 1, 3: NextFaultMtvalM = PCM; // Instruction page/access faults, breakpoint
|
||||
2: NextFaultMtvalM = {{(`XLEN-32){1'b0}}, InstrM}; // Illegal instruction fault
|
||||
2: NextFaultMtvalM = {{(`XLEN-32){1'b0}}, InstrM}; // Illegal instruction fault // *** this should probably set to the uncompressed instruction
|
||||
0, 4, 6, 13, 15, 5, 7: NextFaultMtvalM = IEUAdrM; // Instruction misaligned, Load/Store Misaligned/page/access faults
|
||||
default: NextFaultMtvalM = 0; // Ecall, interrupts
|
||||
endcase
|
||||
|
@ -69,6 +69,7 @@ logic [3:0] dummy;
|
||||
|
||||
logic DCacheFlushDone, DCacheFlushStart;
|
||||
logic riscofTest;
|
||||
logic StartSample, EndSample;
|
||||
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
|
||||
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
|
||||
@ -412,8 +413,7 @@ logic [3:0] dummy;
|
||||
integer HPMCindex;
|
||||
logic StartSampleFirst;
|
||||
logic StartSampleDelayed;
|
||||
logic StartSample;
|
||||
logic EndSample, EndSampleFirst, EndSampleDelayed;
|
||||
logic EndSampleFirst, EndSampleDelayed;
|
||||
logic [`XLEN-1:0] InitialHPMCOUNTERH[`COUNTERS-1:0];
|
||||
|
||||
string HPMCnames[] = '{"Mcycle",
|
||||
@ -454,6 +454,17 @@ logic [3:0] dummy;
|
||||
flopr #(1) EndSampleReg(clk, reset, EndSampleFirst, EndSampleDelayed);
|
||||
assign EndSample = EndSampleFirst & ~ EndSampleDelayed;
|
||||
|
||||
end else if(TEST == "coremark") begin
|
||||
// embench runs warmup then runs start_trigger
|
||||
// embench end with stop_trigger.
|
||||
assign StartSampleFirst = FunctionName.FunctionName.FunctionName == "start_time";
|
||||
flopr #(1) StartSampleReg(clk, reset, StartSampleFirst, StartSampleDelayed);
|
||||
assign StartSample = StartSampleFirst & ~ StartSampleDelayed;
|
||||
|
||||
assign EndSampleFirst = FunctionName.FunctionName.FunctionName == "stop_time";
|
||||
flopr #(1) EndSampleReg(clk, reset, EndSampleFirst, EndSampleDelayed);
|
||||
assign EndSample = EndSampleFirst & ~ EndSampleDelayed;
|
||||
|
||||
end else begin
|
||||
// default start condiction is reset
|
||||
// default end condiction is end of test (DCacheFlushDone)
|
||||
@ -540,15 +551,23 @@ logic [3:0] dummy;
|
||||
string direction;
|
||||
int file;
|
||||
logic PCSrcM;
|
||||
string LogFile;
|
||||
logic resetD, resetEdge;
|
||||
flopenrc #(1) PCSrcMReg(clk, reset, dut.core.FlushM, ~dut.core.StallM, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PCSrcE, PCSrcM);
|
||||
flop #(1) ResetDReg(clk, reset, resetD);
|
||||
assign resetEdge = ~reset & resetD;
|
||||
initial begin
|
||||
file = $fopen("branch.log", "w");
|
||||
LogFile = $psprintf("branch_%s%0d.log", `BPRED_TYPE, `BPRED_SIZE);
|
||||
file = $fopen(LogFile, "w");
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if(resetEdge) $fwrite(file, "TRAIN\n");
|
||||
if(StartSample) $fwrite(file, "BEGIN %s\n", memfilename);
|
||||
if(dut.core.ifu.InstrClassM[0] & ~dut.core.StallW & ~dut.core.FlushW & dut.core.InstrValidM) begin
|
||||
direction = PCSrcM ? "t" : "n";
|
||||
$fwrite(file, "%h %s\n", dut.core.PCM, direction);
|
||||
end
|
||||
if(EndSample) $fwrite(file, "END %s\n", memfilename);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -138,11 +138,6 @@ module testbench;
|
||||
.CMP_CSR (1)
|
||||
) idv_trace2api(rvvi);
|
||||
|
||||
int PRIV_RWX = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE | RVVI_MEMORY_PRIVILEGE_EXEC;
|
||||
int PRIV_RW = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_WRITE;
|
||||
int PRIV_RX = RVVI_MEMORY_PRIVILEGE_READ | RVVI_MEMORY_PRIVILEGE_EXEC;
|
||||
int PRIV_X = RVVI_MEMORY_PRIVILEGE_EXEC;
|
||||
|
||||
initial begin
|
||||
|
||||
MAX_ERRS = 3;
|
||||
@ -173,37 +168,24 @@ module testbench;
|
||||
void'(rvviRefCsrSetVolatile(0, 32'h344)); // MIP
|
||||
void'(rvviRefCsrSetVolatile(0, 32'h144)); // SIP
|
||||
|
||||
/*
|
||||
// Memory lo, hi, priv (RVVI_MEMORY_PRIVILEGE_{READ,WRITE,EXEC})
|
||||
void'(rvviRefMemorySetPrivilege(56'h0, 56'h7fffffffff, 0));
|
||||
if (`BOOTROM_SUPPORTED)
|
||||
void'(rvviRefMemorySetPrivilege(`BOOTROM_BASE, (`BOOTROM_BASE + `BOOTROM_RANGE), PRIV_RX));
|
||||
if (`UNCORE_RAM_SUPPORTED)
|
||||
void'(rvviRefMemorySetPrivilege(`UNCORE_RAM_BASE, (`UNCORE_RAM_BASE + `UNCORE_RAM_RANGE), PRIV_RWX));
|
||||
if (`EXT_MEM_SUPPORTED)
|
||||
void'(rvviRefMemorySetPrivilege(`EXT_MEM_BASE, (`EXT_MEM_BASE + `EXT_MEM_RANGE), PRIV_RWX));
|
||||
|
||||
// Privileges for PMA are set in the imperas.ic
|
||||
// volatile (IO) regions are defined here
|
||||
// only real ROM/RAM areas are BOOTROM and UNCORE_RAM
|
||||
if (`CLINT_SUPPORTED) begin
|
||||
void'(rvviRefMemorySetPrivilege(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE), PRIV_RW));
|
||||
void'(rvviRefMemorySetVolatile(`CLINT_BASE, (`CLINT_BASE + `CLINT_RANGE)));
|
||||
end
|
||||
if (`GPIO_SUPPORTED) begin
|
||||
void'(rvviRefMemorySetPrivilege(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE), PRIV_RW));
|
||||
void'(rvviRefMemorySetVolatile(`GPIO_BASE, (`GPIO_BASE + `GPIO_RANGE)));
|
||||
end
|
||||
if (`UART_SUPPORTED) begin
|
||||
void'(rvviRefMemorySetPrivilege(`UART_BASE, (`UART_BASE + `UART_RANGE), PRIV_RW));
|
||||
void'(rvviRefMemorySetVolatile(`UART_BASE, (`UART_BASE + `UART_RANGE)));
|
||||
end
|
||||
if (`PLIC_SUPPORTED) begin
|
||||
void'(rvviRefMemorySetPrivilege(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE), PRIV_RW));
|
||||
void'(rvviRefMemorySetVolatile(`PLIC_BASE, (`PLIC_BASE + `PLIC_RANGE)));
|
||||
end
|
||||
if (`SDC_SUPPORTED) begin
|
||||
void'(rvviRefMemorySetPrivilege(`SDC_BASE, (`SDC_BASE + `SDC_RANGE), PRIV_RW));
|
||||
void'(rvviRefMemorySetVolatile(`SDC_BASE, (`SDC_BASE + `SDC_RANGE)));
|
||||
end
|
||||
*/
|
||||
|
||||
if(`XLEN==32) begin
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH
|
||||
|
Loading…
Reference in New Issue
Block a user