From 135e2753ac205184a6a15ee42709b650ebf1c1ba Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Apr 2024 10:32:43 -0700 Subject: [PATCH 01/11] Fixed Sail compilation --- bin/wally-tool-chain-install.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/wally-tool-chain-install.sh b/bin/wally-tool-chain-install.sh index 1810747ec..336ce33e4 100755 --- a/bin/wally-tool-chain-install.sh +++ b/bin/wally-tool-chain-install.sh @@ -176,6 +176,10 @@ git clone https://github.com/riscv/sail-riscv.git cd sail-riscv # For now, use checkout that is stable for Wally #git checkout 72b2516d10d472ac77482fd959a9401ce3487f60 # not new enough for Zicboz? +export OPAMCLI=2.0 # Sail is not compatible with opam 2.1 as of 4/16/24 +# It is faster to just build c_emulator/riscv_sim_RV* than to build all of Sail +#make -j ${NUM_THREADS} +#ARCH=RV32 make -j ${NUM_THREADS} make -j ${NUM_THREADS} c_emulator/riscv_sim_RV64 ARCH=RV32 make -j ${NUM_THREADS} c_emulator/riscv_sim_RV32 sudo ln -sf $RISCV/sail-riscv/c_emulator/riscv_sim_RV64 /usr/bin/riscv_sim_RV64 From 21a9ff248a2849d539adc3ead5e688f33e926bdf Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 16 Apr 2024 15:37:04 -0500 Subject: [PATCH 02/11] Fixed regression-wally so it actually produces covereage reports. --- bin/regression-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/regression-wally b/bin/regression-wally index bdb58fea0..29471cb4f 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -208,7 +208,7 @@ def addTests(tests, sim): gs = test[3] else: gs = "All tests ran without failures" - cmdPrefix="wsim --sim " + sim + " " + config + cmdPrefix="wsim --sim " + sim + " " + coverStr + " " + config for t in suites: tc = TestCase( name=t, From dd3460c1a9f07394bfc2ef0906396f4df6ae4a78 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 16 Apr 2024 15:44:42 -0500 Subject: [PATCH 03/11] Fixed makefile and regression-wally so that code coverage now works. --- bin/regression-wally | 4 ++-- sim/Makefile | 42 +++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/bin/regression-wally b/bin/regression-wally index 29471cb4f..a73f937a5 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -381,7 +381,7 @@ def main(): # Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage. # Also it is slow to run. # configs.append(getBuildrootTC(boot=False)) - os.system('rm -f cov/*.ucdb') + os.system('rm -f questa/cov/*.ucdb') elif '--nightly' in sys.argv: TIMEOUT_DUR = 60*1440 # 1 day #configs.append(getBuildrootTC(boot=False)) @@ -407,7 +407,7 @@ def main(): # Coverage report if coverage: - os.system('make coverage') + os.system('make QuestaCoverage') # Count the number of failures if num_fail: print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail) diff --git a/sim/Makefile b/sim/Makefile index 0cae5053e..09d417124 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -17,28 +17,28 @@ all: riscoftests memfiles coveragetests deriv wally-riscv-arch-test: wallyriscoftests memfiles -coverage: cov/rv64gc_arch64i.ucdb +QuestaCoverage: questa/cov/rv64gc_arch64i.ucdb #iter-elf.bash --cover --search ../tests/coverage - vcover merge -out cov/cov.ucdb cov/rv64gc_arch64i.ucdb cov/rv64gc*.ucdb -logfile cov/log -# vcover merge -out cov/cov.ucdb cov/rv64gc_arch64i.ucdb cov/rv64gc*.ucdb cov/buildroot_buildroot.ucdb riscv.ucdb -logfile cov/log - vcover report -details cov/cov.ucdb > cov/rv64gc_coverage_details.rpt - vcover report cov/cov.ucdb -details -instance=/core/ebu. > cov/rv64gc_coverage_ebu.rpt - vcover report cov/cov.ucdb -details -instance=/core/priv. > cov/rv64gc_coverage_priv.rpt - vcover report cov/cov.ucdb -details -instance=/core/ifu. > cov/rv64gc_coverage_ifu.rpt - vcover report cov/cov.ucdb -details -instance=/core/lsu. > cov/rv64gc_coverage_lsu.rpt - vcover report cov/cov.ucdb -details -instance=/core/fpu. > cov/rv64gc_coverage_fpu.rpt - vcover report cov/cov.ucdb -details -instance=/core/ieu. > cov/rv64gc_coverage_ieu.rpt - vcover report cov/cov.ucdb -below 100 -details -instance=/core/ebu. > cov/rv64gc_uncovered_ebu.rpt - vcover report cov/cov.ucdb -below 100 -details -instance=/core/priv. > cov/rv64gc_uncovered_priv.rpt - vcover report cov/cov.ucdb -below 100 -details -instance=/core/ifu. > cov/rv64gc_uncovered_ifu.rpt - vcover report cov/cov.ucdb -below 100 -details -instance=/core/lsu. > cov/rv64gc_uncovered_lsu.rpt - vcover report cov/cov.ucdb -below 100 -details -instance=/core/fpu. > cov/rv64gc_uncovered_fpu.rpt - vcover report cov/cov.ucdb -below 100 -details -instance=/core/ieu. > cov/rv64gc_uncovered_ieu.rpt - vcover report -hierarchical cov/cov.ucdb > cov/rv64gc_coverage_hierarchical.rpt - vcover report -below 100 -hierarchical cov/cov.ucdb > cov/rv64gc_uncovered_hierarchical.rpt -# vcover report -below 100 cov/cov.ucdb > cov/rv64gc_coverage.rpt -# vcover report -recursive cov/cov.ucdb > cov/rv64gc_recursive.rpt - vcover report -details -threshH 100 -html cov/cov.ucdb + vcover merge -out questa/cov/cov.ucdb questa/cov/rv64gc_arch64i.ucdb questa/cov/rv64gc*.ucdb -logfile questa/cov/log +# vcover merge -out questa/cov/cov.ucdb questa/cov/rv64gc_arch64i.ucdb questa/cov/rv64gc*.ucdb questa/cov/buildroot_buildroot.ucdb riscv.ucdb -logfile questa/cov/log + vcover report -details questa/cov/cov.ucdb > questa/cov/rv64gc_coverage_details.rpt + vcover report questa/cov/cov.ucdb -details -instance=/core/ebu. > questa/cov/rv64gc_coverage_ebu.rpt + vcover report questa/cov/cov.ucdb -details -instance=/core/priv. > questa/cov/rv64gc_coverage_priv.rpt + vcover report questa/cov/cov.ucdb -details -instance=/core/ifu. > questa/cov/rv64gc_coverage_ifu.rpt + vcover report questa/cov/cov.ucdb -details -instance=/core/lsu. > questa/cov/rv64gc_coverage_lsu.rpt + vcover report questa/cov/cov.ucdb -details -instance=/core/fpu. > questa/cov/rv64gc_coverage_fpu.rpt + vcover report questa/cov/cov.ucdb -details -instance=/core/ieu. > questa/cov/rv64gc_coverage_ieu.rpt + vcover report questa/cov/cov.ucdb -below 100 -details -instance=/core/ebu. > questa/cov/rv64gc_uncovered_ebu.rpt + vcover report questa/cov/cov.ucdb -below 100 -details -instance=/core/priv. > questa/cov/rv64gc_uncovered_priv.rpt + vcover report questa/cov/cov.ucdb -below 100 -details -instance=/core/ifu. > questa/cov/rv64gc_uncovered_ifu.rpt + vcover report questa/cov/cov.ucdb -below 100 -details -instance=/core/lsu. > questa/cov/rv64gc_uncovered_lsu.rpt + vcover report questa/cov/cov.ucdb -below 100 -details -instance=/core/fpu. > questa/cov/rv64gc_uncovered_fpu.rpt + vcover report questa/cov/cov.ucdb -below 100 -details -instance=/core/ieu. > questa/cov/rv64gc_uncovered_ieu.rpt + vcover report -hierarchical questa/cov/cov.ucdb > questa/cov/rv64gc_coverage_hierarchical.rpt + vcover report -below 100 -hierarchical questa/cov/cov.ucdb > questa/cov/rv64gc_uncovered_hierarchical.rpt +# vcover report -below 100 questa/cov/cov.ucdb > questa/cov/rv64gc_coverage.rpt +# vcover report -recursive questa/cov/cov.ucdb > questa/cov/rv64gc_recursive.rpt + vcover report -details -threshH 100 -html questa/cov/cov.ucdb allclean: clean all From db330b35b233c846fdf3118782024dbf4d1c8ad9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Apr 2024 20:57:49 -0700 Subject: [PATCH 04/11] Removed unnecessary muxes from shiftcorrection; changed flag to --nightly in lint-wally --- bin/lint-wally | 2 +- config/shared/config-shared.vh | 13 +++++++--- src/fpu/postproc/shiftcorrection.sv | 38 +++++++++++++++-------------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/bin/lint-wally b/bin/lint-wally index 4187b0429..08d189ec7 100755 --- a/bin/lint-wally +++ b/bin/lint-wally @@ -11,7 +11,7 @@ GREEN='\033[0;32m' NC='\033[0m' # No Color fails=0 -if [ "$1" == "-nightly" ]; then +if [ "$1" == "--nightly" ]; then configs=(rv32e rv64gc rv32gc rv32imc rv32i rv64i) # fdqh_rv64gc derivconfigs=`ls $WALLY/config/deriv` for entry in $derivconfigs diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 481247eae..25d0d8c65 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -99,7 +99,6 @@ localparam RK = LOGR*DIVCOPIES; // r*k bits // intermediate division parameters not directly used in fdivsqrt hardware localparam FPDIVMINb = NF + 2; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right -//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step. localparam DIVMINb = ((FPDIVMINb Date: Wed, 17 Apr 2024 03:15:20 -0700 Subject: [PATCH 05/11] Updated CoreMark benchmark to default to rv32 for better numbers --- benchmarks/coremark/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 9ac905950..323dd1316 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -5,7 +5,7 @@ PORT_DIR = $(CURDIR)/riscv64-baremetal cmbase= $(WALLY)/addins/coremark work_dir= $(WALLY)/benchmarks/coremark/work -XLEN ?=64 +XLEN ?=32 sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ @@ -29,7 +29,6 @@ all: $(work_dir)/coremark.bare.riscv.elf.memfile run: time wsim rv$(XLEN)gc coremark 2>&1 | tee $(work_dir)/coremark.sim.log - #(cd ../../sim && (time vsim -c -do "do wally-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log)) $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv riscv64-unknown-elf-objdump -D $< > $<.elf.objdump From cd9c2e0e2b0396267c6564df46d6b338774d01d9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Apr 2024 03:16:01 -0700 Subject: [PATCH 06/11] Updated embench Makefile to refer to generic sim, rather than modelsim --- benchmarks/embench/Makefile | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index b4e41ca0d..e4b671c59 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -38,26 +38,27 @@ build_speedopt_size: build_sizeopt_size: $(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-Os -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0" -# builds dependencies, then launches modelsim and finally runs python wrapper script to present results -sim: modelsim_build_memfile modelsim_run speed +# builds dependencies, then launches sim and finally runs python wrapper script to present results +sim: sim_build_memfile sim_run speed -# launches modelsim to simulate tests on wally -modelsim_run: - mkdir -p ../../sim/wkdir - (cd ../../sim/ && wsim rv32gc embench) - cd ../../benchmarks/embench/ +# launches sim to simulate tests on wally +sim_run: + wsim rv32gc embench + #mkdir -p ../../sim/wkdir + #(cd ../../sim/ && wsim rv32gc embench) + #cd ../../benchmarks/embench/ # builds the objdump based on the compiled c elf files objdump: find $(embench_dir)/bd_*_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done # build memfiles, objdump.lab and objdump.addr files -modelsim_build_memfile: objdump +sim_build_memfile: objdump find $(embench_dir)/bd_*_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done find $(embench_dir)/bd_*_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done # builds the tests for speed, runs them on spike and then launches python script to present results -# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim +# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and questa # you'll need to manually remove one of the two .output files, or run make clean spike: buildspeed spike_run speed From 3ea16c6057e6326dad950fed4c0aa55ea9795263 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Apr 2024 03:34:11 -0700 Subject: [PATCH 07/11] Removed note about store stall being depricated --- src/privileged/csrc.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/privileged/csrc.sv b/src/privileged/csrc.sv index db39f26b9..463e3368c 100644 --- a/src/privileged/csrc.sv +++ b/src/privileged/csrc.sv @@ -104,13 +104,13 @@ module csrc import cvw::*; #(parameter cvw_t P) ( assign CounterEvent[9] = RASPredPCWrongM & InstrValidNotFlushedM; // return address stack wrong address assign CounterEvent[10] = IClassWrongM & InstrValidNotFlushedM; // instruction class predictor wrong assign CounterEvent[11] = LoadStallM; // Load Stalls. don't want to suppress on flush as this only happens if flushed. - assign CounterEvent[12] = StoreStallM; // depricated Store Stall + assign CounterEvent[12] = StoreStallM; // Store Stall assign CounterEvent[13] = DCacheAccess; // data cache access assign CounterEvent[14] = DCacheMiss; // data cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[15] = DCacheStallM; // d cache miss cycles + assign CounterEvent[15] = DCacheStallM; // D$ miss cycles assign CounterEvent[16] = ICacheAccess; // instruction cache access assign CounterEvent[17] = ICacheMiss; // instruction cache miss. Miss asserted 1 cycle at start of cache miss - assign CounterEvent[18] = ICacheStallF; // i cache miss cycles + assign CounterEvent[18] = ICacheStallF; // I$ miss cycles assign CounterEvent[19] = CSRWriteM & InstrValidNotFlushedM; // CSR writes assign CounterEvent[20] = InvalidateICacheM & InstrValidNotFlushedM; // fence.i assign CounterEvent[21] = sfencevmaM & InstrValidNotFlushedM; // sfence.vma From 5fdf8dfbecd2d728a4f5833b12847adf67b0bb93 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Apr 2024 04:01:25 -0700 Subject: [PATCH 08/11] Switched back to unsigned ints for RV32 CoreMark per spec. CM/MHz rises from 3.35 to 3.36 --- benchmarks/coremark/Makefile | 2 +- benchmarks/coremark/riscv64-baremetal/core_portme.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 323dd1316..52ffbb05e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -12,7 +12,7 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) #ARCH := rv$(XLEN)gc_zba_zbb_zbc -ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc +ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbs #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr #ARCH := rv$(XLEN)im_zicsr diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 4f5efc1d8..8db43d20a 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -109,11 +109,11 @@ typedef unsigned short ee_u16; typedef signed int ee_s32; typedef double ee_f32; typedef unsigned char ee_u8; -//typedef unsigned int ee_u32; -typedef signed int ee_u32; // replaced with signed to improve performance per https://github.com/sifive/benchmark-coremark/blob/master/linux64/core_portme.h#L102 #if (XLEN==64) + typedef signed int ee_u32; // replaced with signed to improve performance by avoiding zero extension in RV64 per https://github.com/sifive/benchmark-coremark/blob/master/linux64/core_portme.h#L102 typedef unsigned long long ee_ptr_int; #else + typedef unsigned int ee_u32; typedef ee_u32 ee_ptr_int; #endif typedef size_t ee_size_t; From 45c32bbcdf5c8b7d9bbe9da2845e887d036e5fc0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Apr 2024 04:02:06 -0700 Subject: [PATCH 09/11] Fixed zbc to zbs to use full bit manipulation instructions in CoreMark --- benchmarks/coremark/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 52ffbb05e..7c8cc9b8b 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -11,7 +11,6 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -#ARCH := rv$(XLEN)gc_zba_zbb_zbc ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbs #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr From c11daf43f069eabbfe70cbc30d4b2c1671ee3ded Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Apr 2024 04:02:58 -0700 Subject: [PATCH 10/11] removed extranious iteration flag from makefile --- benchmarks/coremark/riscv64-baremetal/core_portme.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.mak b/benchmarks/coremark/riscv64-baremetal/core_portme.mak index 27e31b859..e07196061 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.mak +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.mak @@ -107,7 +107,7 @@ port_prebuild: $(PGO_STAGE) .PHONY: build_pgo_gcc build_pgo_gcc: - $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1 + $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" gen_pgo_data REBUILD=1 # Target: port_postbuild # Generate any files that are needed after actual build end. From 7abf98cb4dd83ce48ff920af6c050b63d8173c1d Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Apr 2024 04:03:48 -0700 Subject: [PATCH 11/11] Reordered coremark sweep to match text --- benchmarks/coremark/coremark_sweep.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmarks/coremark/coremark_sweep.py b/benchmarks/coremark/coremark_sweep.py index 527f5f44f..555c51401 100755 --- a/benchmarks/coremark/coremark_sweep.py +++ b/benchmarks/coremark/coremark_sweep.py @@ -34,18 +34,18 @@ import re import csv # list of architectures to run. arch_list = [ - "rv32gc_zba_zbb_zbc", + "rv32i_zicsr", + "rv32im_zicsr", + "rv32imc_zicsr", "rv32im_zicsr_zba_zbb_zbc", "rv32gc", - "rv32imc_zicsr", - "rv32im_zicsr", - "rv32i_zicsr", - "rv64gc_zba_zbb_zbc", + "rv32gc_zba_zbb_zbc", + "rv64i_zicsr", + "rv64im_zicsr", + "rv64imc_zicsr", "rv64im_zicsr_zba_zbb_zbc", "rv64gc", - "rv64imc_zicsr", - "rv64im_zicsr", - "rv64i_zicsr" + "rv64gc_zba_zbb_zbc" ] str="32" # Define regular expressions to match the desired fields