Merge pull request #936 from kevindkim723/divremsqrtport

adding divremsqrt paper files
This commit is contained in:
David Harris 2024-08-29 15:01:37 -07:00 committed by GitHub
commit 3a772416df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
43 changed files with 6539 additions and 495 deletions

View File

@ -1,3 +1,49 @@
# divremsqrt
This branch contains the relevant hardware and test/synthesis flows for cvw's unified integer/fp divide/sqrt recurrence unit. The recurrence unit can be generated for a variety configurations, which span flavors of radix = {2,4}, floating-point precision = {float,double,quad}, integer width = {unsupported,32,64} and divider copies = {1,2,4,8}.
The fpu postprocessor on cvw handles inputs not only from the div/sqrt unit, but also the fma and convert units. This branch's drsu unit contains a postprocessor with logic only relevant to division/sqrt.
# file hiearchy
The RTL files for the divider can be found under `cvw/src/fpu`
The majority of divider modules are found in `cvw/src/fpu/divremsqrt`, which also borrows some modules from `cvw/src/fpu/fdivsqrt`
divremsqrt/drsu desribes the top-level unit for the divider, taking in unpacked floating point signals, including Xs, Xm Xe, Ys, Ym, Ye.
drsu first feeds signals to `divremsqrt/divremsqrt`, which contains the preprocessor, iteration units, fsm, and postprocessing logic. The postprocessor in `divremsqrt/divremsqrt` also contains all integer postprocessing logic. Outputs from `divremsqrt/divremsqrt` are then sent to `divremsqrt/divremsqrtpostprocess`, which handles rounding and flags.
# verification flow
drsu is verified with the risc-v arch test Berkeley SoftFloat floating point suite of test vectors for floating point square-root and division. In order to run the top-level regression script, run `regression-wally-intdiv -intdiv`
The top-level regression python script is found accordingly in `cvw/bin/regression-wally-intdiv`. The testbench is found in `cvw/testbench/testbench_fp`, which runs drsu against testvectors. Batches of testvectors are stored within `cvw/testbench/tests-fp.vh`, and the raw binary test vectors are read from `tests/fp/vectors`
Regression log files can be found in `cvw/sim/questa/logs` after running `regression-wally-intdiv -intdiv`. Files are named with `{precision}_ieee_div_{R}_{K}_{integer}_rv{XLEN}gc_{TESTNAME}.log`
* precision denotes the floating-point precision types supported by the divider: f, fd, fdq, fdqh
* R denotes the radix of the divider: 2,4
* K denotes the number of divider copies in the unit: 1,2,4,8
* integer denotes whether integer division/remainder is supported on the divider: i
* XLEN denotes the width of integers: 32, 64 (this only matters if integer is supported on the divider)
* TESTNAME denotes which tests are being run:
* fdivremsqrt: runs fdiv, fsqrt, intdiv, intrem
* fdiv: runs fdiv
* fsqrt: runs fsqrt
# synthesis flow
To run synthesis results for all flavors of the recurrence unit, go to `cvw/synthDC/scripts` and run `python3 synthdrsu.py`. This will execute a python script that runs the installed version of synopsis design compiler on divider permutations for a target frequency of 5GHz and 100MHz. To then pipe area, delay and energy results to a CSV, run `./writeCSV.sh`. Results can then be viewed in `fp-synthresults_reordered.csv` in a format similar to the one presented in the paper.
# start-up steps
1) `git clone --recurse-submodules https://github.com/openhwgroup/cvw.git`
2) `cd cvw`
3) `git checkout divremsqrt`
4) `source ./setup.sh`
5) `make`
6) `/sim/regression-wally -intdiv`
# core-v-wally
Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, including RV32/64, A, B, C, D, F, M, Q, and Zk* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO). Wally is written in SystemVerilog. It passes the [RISC-V Arch Tests](https://github.com/riscv-non-isa/riscv-arch-test) and boots Linux on an FPGA. Configurations range from a minimal RV32E core to a fully featured RV64GC application processor.

View File

@ -371,6 +371,7 @@ args = parser.parse_args()
if (args.nightly):
nightMode = "--nightly";
sims = ["questa", "verilator", "vcs"] # exercise all simulators; can omit a sim if no license is available
# sims = ["questa", "verilator"] # exercise all simulators; can omit a sim if no license is available
else:
nightMode = ""
sims = [defaultsim]
@ -512,10 +513,12 @@ def main():
elif args.fcov:
TIMEOUT_DUR = 1*60
os.system('rm -f questa/fcov_ucdb/* questa/fcov_logs/* questa/fcov/*')
elif args.nightly:
elif args.buildroot:
TIMEOUT_DUR = 60*1440 # 1 day
elif args.testfloat:
TIMEOUT_DUR = 30*60 # seconds
elif args.nightly:
TIMEOUT_DUR = 30*60 # seconds
else:
TIMEOUT_DUR = 10*60 # seconds

577
bin/regression-wally-intdiv Executable file
View File

@ -0,0 +1,577 @@
#!/usr/bin/python3
##################################
#
# regression-wally
# David_Harris@Hmc.edu 25 January 2021
# Modified by Jarred Allen <jaallen@g.hmc.edu>
#
# Run a regression with multiple configurations in parallel and exit with
# non-zero status code if an error happened, as well as printing human-readable
# output.
#
##################################
import sys,os,shutil
import multiprocessing
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
from collections import namedtuple
WALLY = os.environ.get('WALLY')
regressionDir = WALLY + '/sim'
os.chdir(regressionDir)
coverage = '-coverage' in sys.argv
fp = '-fp' in sys.argv
nightly = '-nightly' in sys.argv
softfloat = '-softfloat' in sys.argv
intdiv = '-intdiv' in sys.argv
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
# name: the name of this test configuration (used in printing human-readable
# output and picking logfile names)
# cmd: the command to run to test (should include the logfile as '{}', and
# the command needs to write to that file)
# grepstr: the string to grep through the log file for. The test succeeds iff
# grep finds that string in the logfile (is used by grep, so it may
# be any pattern grep accepts, see `man 1 grep` for more info).
# edit this list to add more test cases
if (nightly):
nightMode = "-nightly";
configs = []
else:
nightMode = "";
configs = [
TestCase(
name="lints",
variant="all",
cmd="./lint-wally " + nightMode + " | tee {}",
grepstr="lints run with no errors or warnings"
)
]
def getBuildrootTC(boot):
INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
if boot:
name="buildrootboot"
BRcmd="vsim > {} -c <<!\ndo wally.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
BRgrepstr="WallyHostname login:"
else:
name="buildroot"
if (coverage):
print( "buildroot coverage")
BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0 -coverage\n!"
else:
print( "buildroot no coverage")
BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot configOptions -GINSTR_LIMIT=" +str(INSTR_LIMIT) + " \n!"
BRgrepstr=str(INSTR_LIMIT)+" instructions"
return TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused
tests64i = ["arch64i"]
for test in tests64i:
tc = TestCase(
name=test,
variant="rv64i",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64i "+test+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused
tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32f_divsqrt", "arch32d_divsqrt",
"arch32i", "arch32priv", "arch32c", "arch32m", "arch32a", "arch32zifencei", "arch32zicond",
"arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zfh", "arch32zfh_fma",
"arch32zfh_divsqrt", "arch32zfaf", "wally32a", "wally32priv", "wally32periph",
"arch32zbkb", "arch32zbkc", "arch32zbkx", "arch32zknd", "arch32zkne", "arch32zknh"] # "arch32zbc", "arch32zfad",
#tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32i", "arch32priv", "arch32c", "arch32m", "arch32a", "arch32zifencei", "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zicboz", "arch32zcb", "wally32a", "wally32priv", "wally32periph"]
for test in tests32gc:
tc = TestCase(
name=test,
variant="rv32gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc "+test+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
tests32imcimperas = ["imperas32i", "imperas32c"] # unused
tests32imc = ["arch32i", "arch32c", "arch32m", "wally32periph"]
for test in tests32imc:
tc = TestCase(
name=test,
variant="rv32imc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32imc "+test+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
tests32i = ["arch32i"]
for test in tests32i:
tc = TestCase(
name=test,
variant="rv32i",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32i "+test+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
tests32e = ["arch32e"]
for test in tests32e:
tc = TestCase(
name=test,
variant="rv32e",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32e "+test+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64f_divsqrt", "arch64d_divsqrt", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", "arch64zfh", "arch64zfh_divsqrt", "arch64zfh_fma", "arch64zfaf", "arch64zfad", "arch64zbkb", "arch64zbkc", "arch64zbkx", "arch64zknd", "arch64zkne", "arch64zknh",
"arch64priv", "arch64c", "arch64m", "arch64a", "arch64zifencei", "arch64zicond", "wally64a", "wally64periph", "wally64priv"] # add arch64zfh_fma when available; arch64zicobz, arch64zcb when working
#tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",
# "arch64priv", "arch64c", "arch64m", "arch64a", "arch64zifencei", "wally64a", "wally64periph", "wally64priv", "arch64zicboz", "arch64zcb"]
if (coverage): # delete all but 64gc tests when running coverage
configs = []
tests64gc = ["coverage64gc", "arch64i", "arch64priv", "arch64c", "arch64m",
"arch64zifencei", "arch64zicond", "arch64a", "wally64a", "wally64periph", "wally64priv",
"arch64zba", "arch64zbb", "arch64zbc", "arch64zbs"] # add when working: "arch64zcb", "arch64zicboz"
if (fp):
tests64gc.append("arch64f")
tests64gc.append("arch64d")
tests64gc.append("arch64zfh")
tests64gc.append("arch64f_fma")
tests64gc.append("arch64d_fma")
tests64gc.append("arch64zfh_fma")
tests64gc.append("arch64f_divsqrt")
tests64gc.append("arch64d_divsqrt")
tests64gc.append("arch64zfh_divsqrt")
tests64gc.append("arch64zfaf")
tests64gc.append("arch64zfad")
coverStr = '-coverage'
else:
coverStr = ''
for test in tests64gc:
tc = TestCase(
name=test,
variant="rv64gc",
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+" " + coverStr + "\n!",
grepstr="All tests ran without failures")
configs.append(tc)
# run derivative configurations if requested
if (nightly):
derivconfigtests = [
["tlb2_rv32gc", ["wally32priv"]],
["tlb16_rv32gc", ["wally32priv"]],
["tlb2_rv64gc", ["wally64priv"]],
["tlb16_rv64gc", ["wally64priv"]],
["way_1_4096_512_rv32gc", ["arch32i"]],
["way_2_4096_512_rv32gc", ["arch32i"]],
["way_8_4096_512_rv32gc", ["arch32i"]],
["way_4_2048_512_rv32gc", ["arch32i"]],
["way_4_4096_256_rv32gc", ["arch32i"]],
["way_1_4096_512_rv64gc", ["arch64i"]],
["way_2_4096_512_rv64gc", ["arch64i"]],
["way_8_4096_512_rv64gc", ["arch64i"]],
["way_4_2048_512_rv64gc", ["arch64i"]],
["way_4_4096_256_rv64gc", ["arch64i"]],
["way_4_4096_1024_rv64gc", ["arch64i"]],
["ram_0_0_rv64gc", ["ahb64"]],
["ram_1_0_rv64gc", ["ahb64"]],
["ram_1_1_rv64gc", ["ahb64"]],
["ram_2_0_rv64gc", ["ahb64"]],
["ram_2_1_rv64gc", ["ahb64"]],
["noicache_rv32gc", ["ahb32"]],
# cacheless designs will not work until DTIM supports FLEN > XLEN
# ["nodcache_rv32gc", ["ahb32"]],
# ["nocache_rv32gc", ["ahb32"]],
["noicache_rv64gc", ["ahb64"]],
["nodcache_rv64gc", ["ahb64"]],
["nocache_rv64gc", ["ahb64"]],
### add misaligned tests
["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
### branch predictor simulation
# ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# # btb
# ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# # ras
# ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
# enable floating-point tests when lint is fixed
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
]
for test in derivconfigtests:
config = test[0];
tests = test[1];
if(len(test) >= 4 and test[2] == "configOptions"):
configOptions = test[3]
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
else:
configOptions = ""
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
for t in tests:
tc = TestCase(
name=t,
variant=config,
cmd=cmdPrefix+" "+t+" configOptions "+configOptions+"\n!",
grepstr="All tests ran without failures")
configs.append(tc)
# softfloat tests
if (softfloat):
testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
configs = []
softfloatconfigs = [
"fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
"fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
"fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
"fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
"fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
"fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
"fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
"fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
"fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
"fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
"fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
"fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
"fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
"fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
"fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
"fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
"fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
"fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
"fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
"fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
"f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
"f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
"f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
"f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
]
for config in softfloatconfigs:
# div test case
divtest = TestCase(
name="div",
variant=config,
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " div \n!",
grepstr="All Tests completed with 0 errors"
)
configs.insert(0,divtest)
# sqrt test case
sqrttest = TestCase(
name="sqrt",
variant=config,
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " sqrt \n!",
grepstr="All Tests completed with 0 errors"
)
#configs.append(sqrttest)
configs.insert(0,sqrttest)
# skip if divider variant config
if ("ieee" in config):
# cvtint test case
cvtinttest = TestCase(
name="cvtint",
variant=config,
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " cvtint \n!",
grepstr="All Tests completed with 0 errors"
)
configs.append(cvtinttest)
# cvtfp test case
# WILL fail on F_only (refer to spec)
cvtfptest = TestCase(
name="cvtfp",
variant=config,
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " cvtfp \n!",
grepstr="All Tests completed with 0 errors"
)
configs.append(cvtfptest)
# intdiv verification
if (intdiv):
configs = []
testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
# ***NOTE add to this
intdivconfigs = [
"fdh_ieee_div_2_1i_rv32gc", "fdh_ieee_div_2_1i_rv64gc", "fdh_ieee_div_2_2i_rv32gc",
"fdh_ieee_div_2_2i_rv64gc", "fdh_ieee_div_2_4i_rv32gc", "fdh_ieee_div_2_4i_rv64gc",
"fdh_ieee_div_4_1i_rv32gc", "fdh_ieee_div_4_1i_rv64gc", "fdh_ieee_div_4_2i_rv32gc",
"fdh_ieee_div_4_2i_rv64gc", "fdh_ieee_div_4_4i_rv32gc", "fdh_ieee_div_4_4i_rv64gc",
"fd_ieee_div_2_1i_rv32gc", "fd_ieee_div_2_1i_rv64gc", "fd_ieee_div_2_2i_rv32gc",
"fd_ieee_div_2_2i_rv64gc", "fd_ieee_div_2_4i_rv32gc", "fd_ieee_div_2_4i_rv64gc",
"fd_ieee_div_4_1i_rv32gc", "fd_ieee_div_4_1i_rv64gc", "fd_ieee_div_4_2i_rv32gc",
"fd_ieee_div_4_2i_rv64gc", "fd_ieee_div_4_4i_rv32gc", "fd_ieee_div_4_4i_rv64gc",
"fdqh_ieee_div_2_1i_rv32gc", "fdqh_ieee_div_2_1i_rv64gc", "fdqh_ieee_div_2_2i_rv32gc",
"fdqh_ieee_div_2_2i_rv64gc", "fdqh_ieee_div_2_4i_rv32gc", "fdqh_ieee_div_2_4i_rv64gc",
"fdqh_ieee_div_4_1i_rv32gc", "fdqh_ieee_div_4_1i_rv64gc", "fdqh_ieee_div_4_2i_rv32gc",
"fdqh_ieee_div_4_2i_rv64gc", "fdqh_ieee_div_4_4i_rv32gc", "fdqh_ieee_div_4_4i_rv64gc",
"fdq_ieee_div_2_1i_rv32gc", "fdq_ieee_div_2_1i_rv64gc", "fdq_ieee_div_2_2i_rv32gc",
"fdq_ieee_div_2_2i_rv64gc", "fdq_ieee_div_2_4i_rv32gc", "fdq_ieee_div_2_4i_rv64gc",
"fdq_ieee_div_4_1i_rv32gc", "fdq_ieee_div_4_1i_rv64gc", "fdq_ieee_div_4_2i_rv32gc",
"fdq_ieee_div_4_2i_rv64gc", "fdq_ieee_div_4_4i_rv32gc", "fdq_ieee_div_4_4i_rv64gc",
"fh_ieee_div_2_1i_rv32gc", "fh_ieee_div_2_1i_rv64gc", "fh_ieee_div_2_2i_rv32gc",
"fh_ieee_div_2_2i_rv64gc", "fh_ieee_div_2_4i_rv32gc", "fh_ieee_div_2_4i_rv64gc",
"fh_ieee_div_4_1i_rv32gc", "fh_ieee_div_4_1i_rv64gc", "fh_ieee_div_4_2i_rv32gc",
"fh_ieee_div_4_2i_rv64gc", "fh_ieee_div_4_4i_rv32gc", "fh_ieee_div_4_4i_rv64gc",
"f_ieee_div_2_1i_rv32gc", "f_ieee_div_2_1i_rv64gc", "f_ieee_div_2_2i_rv32gc",
"f_ieee_div_2_2i_rv64gc", "f_ieee_div_2_4i_rv32gc", "f_ieee_div_2_4i_rv64gc",
"f_ieee_div_4_1i_rv32gc", "f_ieee_div_4_1i_rv64gc", "f_ieee_div_4_2i_rv32gc",
"f_ieee_div_4_2i_rv64gc", "f_ieee_div_4_4i_rv32gc", "f_ieee_div_4_4i_rv64gc",
"fd_ieee_div_2_8i_rv32gc",
"fd_ieee_div_2_8i_rv64gc",
"fdq_ieee_div_2_8i_rv64gc",
"fdq_ieee_div_2_8i_rv32gc",
"f_ieee_div_2_8i_rv64gc",
"f_ieee_div_2_8i_rv32gc"
]
nointdivconfigs = [
"fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
"fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
"fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
"fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
"fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
"fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
"fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
"fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
"fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
"fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
"fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
"fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
"fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
"fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
"fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
"fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
"fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
"fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
"fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
"fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
"f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
"f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
"f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
"f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
]
for config in intdivconfigs:
# fdivremsqrt test case
name = "div_drsu"
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log"
fdivremsqrttestcase = TestCase(
name=name,
variant=config,
cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
grepstr="All Tests completed with 0 errors"
)
configs.insert(0,fdivremsqrttestcase)
for config in nointdivconfigs:
# div,sqrt test cases for no integer flavor of divider
name = "div_drsu"
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log"
divtestcase = TestCase(
name=name,
variant=config,
#cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " div_drsu \n!",
cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
grepstr="All Tests completed with 0 errors"
)
configs.insert(0,divtestcase)
name = "sqrt_drsu"
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log"
sqrttestcase = TestCase(
name=name,
variant=config,
#cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " sqrt_drsu \n!",
cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
grepstr="All Tests completed with 0 errors"
)
configs.insert(0,sqrttestcase)
import os
from multiprocessing import Pool, TimeoutError
def search_log_for_text(text, logfile):
"""Search through the given log file for text, returning True if it is found or False if it is not"""
grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile)
return os.system(grepcmd) == 0
def run_test_case(config):
testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
"""Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
#sim_logdir = WALLY+ "/sim/" + sim + "/logs/"
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config.variant+"_"+config.name+".log"
#logname = "logs/"+config.variant+"_"+config.name+".log"
cmd = config.cmd.format(logname)
# print(cmd)
os.chdir(regressionDir)
os.system(cmd)
if search_log_for_text(config.grepstr, logname):
print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name))
return 0
else:
print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name))
print(" Check %s" % logname)
return 1
def main():
"""Run the tests and count the failures"""
global configs, coverage
try:
os.chdir(regressionDir)
os.mkdir("logs")
except:
pass
try:
shutil.rmtree("wkdir")
except:
pass
finally:
os.mkdir("wkdir")
if '-makeTests' in sys.argv:
os.chdir(regressionDir)
os.system('./make-tests.sh | tee ./logs/make-tests.log')
if '-all' in sys.argv:
TIMEOUT_DUR = 30*7200 # seconds
configs.append(getBuildrootTC(boot=True))
elif '-buildroot' in sys.argv:
TIMEOUT_DUR = 30*7200 # seconds
configs=[getBuildrootTC(boot=True)]
elif '-coverage' in sys.argv:
TIMEOUT_DUR = 20*60 # seconds
# Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage.
# Also it is slow to run.
# configs.append(getBuildrootTC(boot=False))
os.system('rm -f cov/*.ucdb')
elif '-nightly' in sys.argv:
TIMEOUT_DUR = 60*1440 # 1 day
configs.append(getBuildrootTC(boot=False))
elif '-softfloat' in sys.argv:
TIMEOUT_DUR = 60*60 # seconds
elif '-intdiv' in sys.argv:
TIMEOUT_DUR = 60*60 # seconds
else:
TIMEOUT_DUR = 10*60 # seconds
configs.append(getBuildrootTC(boot=False))
# Scale the number of concurrent processes to the number of test cases, but
# max out at a limited number of concurrent processes to not overwhelm the system
with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool:
num_fail = 0
results = {}
for config in configs:
results[config] = pool.apply_async(run_test_case,(config,))
for (config,result) in results.items():
try:
num_fail+=result.get(timeout=TIMEOUT_DUR)
except TimeoutError:
num_fail+=1
print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
# Coverage report
if coverage:
os.system('make coverage')
# Count the number of failures
if num_fail:
print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
else:
print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}")
return num_fail
if __name__ == '__main__':
exit(main())

View File

@ -28,6 +28,7 @@ parser.add_argument("--tb", "-t", help="Testbench", choices=["testbench", "testb
parser.add_argument("--gui", "-g", help="Simulate with GUI", action="store_true")
parser.add_argument("--ccov", "-c", help="Code Coverage", action="store_true")
parser.add_argument("--fcov", "-f", help="Functional Coverage, implies lockstep", action="store_true")
parser.add_argument("--fcov2", "-f2", help="Functional Coverage, implies lockstep", action="store_true")
parser.add_argument("--fcovrvvi", "-fr", help="Functional Coverage RVVI", action="store_true")
parser.add_argument("--args", "-a", help="Optional arguments passed to simulator via $value$plusargs", default="")
parser.add_argument("--vcd", "-v", help="Generate testbench.vcd", action="store_true")
@ -66,7 +67,7 @@ if(args.testsuite.endswith('.elf') and args.elf == ""): # No --elf argument; che
# Validate arguments
if (args.gui or args.ccov or args.fcov or args.fcovrvvi or args.lockstep):
if (args.gui or args.ccov or args.fcov or args.fcov2 or args.fcovrvvi or args.lockstep):
if args.sim not in ["questa", "vcs"]:
print("Option only supported for Questa and VCS")
exit(1)
@ -81,7 +82,7 @@ if (args.rvvi):
if(int(args.locksteplog) >= 1): EnableLog = 1
else: EnableLog = 0
if (args.lockstep):
prefix = "IMPERAS_TOOLS=" + WALLY + "/sim/imperas.ic"
prefix = "IMPERAS_TOOLS=" + WALLY + "/config/"+args.config+"/imperas.ic"
if(args.locksteplog != 0): ImperasPlusArgs = " +IDV_TRACE2LOG=" + str(EnableLog) + " +IDV_TRACE2LOG_AFTER=" + str(args.locksteplog)
else: ImperasPlusArgs = ""
if(args.fcov):
@ -90,6 +91,12 @@ if (args.lockstep):
else: EnableLog = 0
ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr;
suffix = ""
if(args.fcov2):
CovEnableStr = "1" if int(args.covlog) > 0 else "0";
if(args.covlog >= 1): EnableLog = 1
else: EnableLog = 0
ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr;
suffix = ""
else:
CovEnableStr = ""
suffix = "--lockstep"
@ -104,6 +111,8 @@ if (args.ccov):
flags += " --ccov"
if (args.fcov):
flags += " --fcov"
if (args.fcov2):
flags += " --fcov2"
if (args.fcovrvvi):
flags += "--fcovrvvi"

View File

@ -950,6 +950,9 @@ D_SUPPORTED 0
ZCD_SUPPORTED 0
ZFH_SUPPORTED 0
deriv f_div_2_8_rv64gc f_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv f_div_4_1_rv64gc div_4_1_rv64gc
D_SUPPORTED 0
ZCD_SUPPORTED 0
@ -982,6 +985,9 @@ D_SUPPORTED 0
ZCD_SUPPORTED 0
ZFH_SUPPORTED 1
deriv fh_div_2_8_rv32gc fh_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fh_div_4_1_rv32gc div_4_1_rv32gc
D_SUPPORTED 0
ZCD_SUPPORTED 0
@ -1012,6 +1018,9 @@ D_SUPPORTED 0
ZCD_SUPPORTED 0
ZFH_SUPPORTED 1
deriv fh_div_2_8_rv64gc fh_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fh_div_4_1_rv64gc div_4_1_rv64gc
D_SUPPORTED 0
ZCD_SUPPORTED 0
@ -1038,6 +1047,9 @@ ZFH_SUPPORTED 0
deriv fd_div_2_4_rv32gc div_2_4_rv32gc
ZFH_SUPPORTED 0
deriv fd_div_2_8_rv32gc fd_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fd_div_4_1_rv32gc div_4_1_rv32gc
ZFH_SUPPORTED 0
@ -1056,6 +1068,9 @@ ZFH_SUPPORTED 0
deriv fd_div_2_4_rv64gc div_2_4_rv64gc
ZFH_SUPPORTED 0
deriv fd_div_2_8_rv64gc fd_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fd_div_4_1_rv64gc div_4_1_rv64gc
ZFH_SUPPORTED 0
@ -1077,6 +1092,9 @@ ZFH_SUPPORTED 1
deriv fdh_div_2_4_rv32gc div_2_4_rv32gc
ZFH_SUPPORTED 1
deriv fdh_div_2_8_rv32gc fdh_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fdh_div_4_1_rv32gc div_4_1_rv32gc
ZFH_SUPPORTED 1
@ -1095,6 +1113,9 @@ ZFH_SUPPORTED 1
deriv fdh_div_2_4_rv64gc div_2_4_rv64gc
ZFH_SUPPORTED 1
deriv fdh_div_2_8_rv64gc fdh_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fdh_div_4_1_rv64gc div_4_1_rv64gc
ZFH_SUPPORTED 1
@ -1118,6 +1139,9 @@ deriv fdq_div_2_4_rv32gc div_2_4_rv32gc
Q_SUPPORTED 1
ZFH_SUPPORTED 0
deriv fdq_div_2_8_rv32gc fdq_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fdq_div_4_1_rv32gc div_4_1_rv32gc
Q_SUPPORTED 1
ZFH_SUPPORTED 0
@ -1142,6 +1166,9 @@ deriv fdq_div_2_4_rv64gc div_2_4_rv64gc
Q_SUPPORTED 1
ZFH_SUPPORTED 0
deriv fdq_div_2_8_rv64gc fdq_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fdq_div_4_1_rv64gc div_4_1_rv64gc
Q_SUPPORTED 1
ZFH_SUPPORTED 0
@ -1168,6 +1195,9 @@ deriv fdqh_div_2_4_rv32gc div_2_4_rv32gc
Q_SUPPORTED 1
ZFH_SUPPORTED 1
deriv fdqh_div_2_8_rv32gc fdqh_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fdqh_div_4_1_rv32gc div_4_1_rv32gc
Q_SUPPORTED 1
ZFH_SUPPORTED 1
@ -1192,6 +1222,9 @@ deriv fdqh_div_2_4_rv64gc div_2_4_rv64gc
Q_SUPPORTED 1
ZFH_SUPPORTED 1
deriv fdqh_div_2_8_rv64gc fdqh_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fdqh_div_4_1_rv64gc div_4_1_rv64gc
Q_SUPPORTED 1
ZFH_SUPPORTED 1
@ -1215,6 +1248,9 @@ IEEE754 1
deriv f_ieee_div_2_4_rv32gc f_div_2_4_rv32gc
IEEE754 1
deriv f_ieee_div_2_8_rv32gc f_ieee_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv f_ieee_div_4_1_rv32gc f_div_4_1_rv32gc
IEEE754 1
@ -1233,6 +1269,9 @@ IEEE754 1
deriv f_ieee_div_2_4_rv64gc f_div_2_4_rv64gc
IEEE754 1
deriv f_ieee_div_2_8_rv64gc f_ieee_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv f_ieee_div_4_1_rv64gc f_div_4_1_rv64gc
IEEE754 1
@ -1252,6 +1291,9 @@ IEEE754 1
deriv fh_ieee_div_2_4_rv32gc fh_div_2_4_rv32gc
IEEE754 1
deriv fh_ieee_div_2_8_rv32gc fh_ieee_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fh_ieee_div_4_1_rv32gc fh_div_4_1_rv32gc
IEEE754 1
@ -1270,6 +1312,9 @@ IEEE754 1
deriv fh_ieee_div_2_4_rv64gc fh_div_2_4_rv64gc
IEEE754 1
deriv fh_ieee_div_2_8_rv64gc fh_ieee_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fh_ieee_div_4_1_rv64gc fh_div_4_1_rv64gc
IEEE754 1
@ -1289,6 +1334,9 @@ IEEE754 1
deriv fd_ieee_div_2_4_rv32gc fd_div_2_4_rv32gc
IEEE754 1
deriv fd_ieee_div_2_8_rv32gc fd_ieee_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fd_ieee_div_4_1_rv32gc fd_div_4_1_rv32gc
IEEE754 1
@ -1307,6 +1355,9 @@ IEEE754 1
deriv fd_ieee_div_2_4_rv64gc fd_div_2_4_rv64gc
IEEE754 1
deriv fd_ieee_div_2_8_rv64gc fd_ieee_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fd_ieee_div_4_1_rv64gc fd_div_4_1_rv64gc
IEEE754 1
@ -1327,6 +1378,9 @@ IEEE754 1
deriv fdh_ieee_div_2_4_rv32gc fdh_div_2_4_rv32gc
IEEE754 1
deriv fdh_ieee_div_2_8_rv32gc fdh_ieee_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fdh_ieee_div_4_1_rv32gc fdh_div_4_1_rv32gc
IEEE754 1
@ -1345,6 +1399,9 @@ IEEE754 1
deriv fdh_ieee_div_2_4_rv64gc fdh_div_2_4_rv64gc
IEEE754 1
deriv fdh_ieee_div_2_8_rv64gc fdh_ieee_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fdh_ieee_div_4_1_rv64gc fdh_div_4_1_rv64gc
IEEE754 1
@ -1364,6 +1421,9 @@ IEEE754 1
deriv fdq_ieee_div_2_4_rv32gc fdq_div_2_4_rv32gc
IEEE754 1
deriv fdq_ieee_div_2_8_rv32gc fdq_ieee_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fdq_ieee_div_4_1_rv32gc fdq_div_4_1_rv32gc
IEEE754 1
@ -1382,6 +1442,9 @@ IEEE754 1
deriv fdq_ieee_div_2_4_rv64gc fdq_div_2_4_rv64gc
IEEE754 1
deriv fdq_ieee_div_2_8_rv64gc fdq_ieee_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fdq_ieee_div_4_1_rv64gc fdq_div_4_1_rv64gc
IEEE754 1
@ -1402,6 +1465,9 @@ IEEE754 1
deriv fdqh_ieee_div_2_4_rv32gc fdqh_div_2_4_rv32gc
IEEE754 1
deriv fdqh_ieee_div_2_8_rv32gc fdqh_ieee_div_2_4_rv32gc
DIVCOPIES 32'd8
deriv fdqh_ieee_div_4_1_rv32gc fdqh_div_4_1_rv32gc
IEEE754 1
@ -1420,6 +1486,9 @@ IEEE754 1
deriv fdqh_ieee_div_2_4_rv64gc fdqh_div_2_4_rv64gc
IEEE754 1
deriv fdqh_ieee_div_2_8_rv64gc fdqh_ieee_div_2_4_rv64gc
DIVCOPIES 32'd8
deriv fdqh_ieee_div_4_1_rv64gc fdqh_div_4_1_rv64gc
IEEE754 1
@ -1440,6 +1509,9 @@ IDIV_ON_FPU 1
deriv f_ieee_div_2_4i_rv32gc f_ieee_div_2_4_rv32gc
IDIV_ON_FPU 1
deriv f_ieee_div_2_8i_rv32gc f_ieee_div_2_4i_rv32gc
DIVCOPIES 32'd8
deriv f_ieee_div_4_1i_rv32gc f_ieee_div_4_1_rv32gc
IDIV_ON_FPU 1
@ -1458,6 +1530,9 @@ IDIV_ON_FPU 1
deriv f_ieee_div_2_4i_rv64gc f_ieee_div_2_4_rv64gc
IDIV_ON_FPU 1
deriv f_ieee_div_2_8i_rv64gc f_ieee_div_2_4i_rv64gc
DIVCOPIES 32'd8
deriv f_ieee_div_4_1i_rv64gc f_ieee_div_4_1_rv64gc
IDIV_ON_FPU 1
@ -1477,6 +1552,9 @@ IDIV_ON_FPU 1
deriv fh_ieee_div_2_4i_rv32gc fh_ieee_div_2_4_rv32gc
IDIV_ON_FPU 1
deriv fh_ieee_div_2_8i_rv32gc fh_ieee_div_2_4i_rv32gc
DIVCOPIES 32'd8
deriv fh_ieee_div_4_1i_rv32gc fh_ieee_div_4_1_rv32gc
IDIV_ON_FPU 1
@ -1495,6 +1573,9 @@ IDIV_ON_FPU 1
deriv fh_ieee_div_2_4i_rv64gc fh_ieee_div_2_4_rv64gc
IDIV_ON_FPU 1
deriv fh_ieee_div_2_8i_rv64gc fh_ieee_div_2_4i_rv64gc
DIVCOPIES 32'd8
deriv fh_ieee_div_4_1i_rv64gc fh_ieee_div_4_1_rv64gc
IDIV_ON_FPU 1
@ -1515,6 +1596,9 @@ IDIV_ON_FPU 1
deriv fd_ieee_div_2_4i_rv32gc fd_ieee_div_2_4_rv32gc
IDIV_ON_FPU 1
deriv fd_ieee_div_2_8i_rv32gc fd_ieee_div_2_4i_rv32gc
DIVCOPIES 32'd8
deriv fd_ieee_div_4_1i_rv32gc fd_ieee_div_4_1_rv32gc
IDIV_ON_FPU 1
@ -1533,6 +1617,9 @@ IDIV_ON_FPU 1
deriv fd_ieee_div_2_4i_rv64gc fd_ieee_div_2_4_rv64gc
IDIV_ON_FPU 1
deriv fd_ieee_div_2_8i_rv64gc fd_ieee_div_2_4i_rv64gc
DIVCOPIES 32'd8
deriv fd_ieee_div_4_1i_rv64gc fd_ieee_div_4_1_rv64gc
IDIV_ON_FPU 1
@ -1553,6 +1640,9 @@ IDIV_ON_FPU 1
deriv fdh_ieee_div_2_4i_rv32gc fdh_ieee_div_2_4_rv32gc
IDIV_ON_FPU 1
deriv fdh_ieee_div_2_8i_rv32gc fdh_ieee_div_2_4i_rv32gc
DIVCOPIES 32'd8
deriv fdh_ieee_div_4_1i_rv32gc fdh_ieee_div_4_1_rv32gc
IDIV_ON_FPU 1
@ -1571,6 +1661,9 @@ IDIV_ON_FPU 1
deriv fdh_ieee_div_2_4i_rv64gc fdh_ieee_div_2_4_rv64gc
IDIV_ON_FPU 1
deriv fdh_ieee_div_2_8i_rv64gc fdh_ieee_div_2_4i_rv64gc
DIVCOPIES 32'd8
deriv fdh_ieee_div_4_1i_rv64gc fdh_ieee_div_4_1_rv64gc
IDIV_ON_FPU 1
@ -1591,6 +1684,9 @@ IDIV_ON_FPU 1
deriv fdq_ieee_div_2_4i_rv32gc fdq_ieee_div_2_4_rv32gc
IDIV_ON_FPU 1
deriv fdq_ieee_div_2_8i_rv32gc fdq_ieee_div_2_4i_rv32gc
DIVCOPIES 32'd8
deriv fdq_ieee_div_4_1i_rv32gc fdq_ieee_div_4_1_rv32gc
IDIV_ON_FPU 1
@ -1609,6 +1705,9 @@ IDIV_ON_FPU 1
deriv fdq_ieee_div_2_4i_rv64gc fdq_ieee_div_2_4_rv64gc
IDIV_ON_FPU 1
deriv fdq_ieee_div_2_8i_rv64gc fdq_ieee_div_2_4i_rv64gc
DIVCOPIES 32'd8
deriv fdq_ieee_div_4_1i_rv64gc fdq_ieee_div_4_1_rv64gc
IDIV_ON_FPU 1
@ -1629,6 +1728,9 @@ IDIV_ON_FPU 1
deriv fdqh_ieee_div_2_4i_rv32gc fdqh_ieee_div_2_4_rv32gc
IDIV_ON_FPU 1
deriv fdqh_ieee_div_2_8i_rv32gc fdqh_ieee_div_2_4i_rv32gc
DIVCOPIES 32'd8
deriv fdqh_ieee_div_4_1i_rv32gc fdqh_ieee_div_4_1_rv32gc
IDIV_ON_FPU 1
@ -1647,6 +1749,9 @@ IDIV_ON_FPU 1
deriv fdqh_ieee_div_2_4i_rv64gc fdqh_ieee_div_2_4_rv64gc
IDIV_ON_FPU 1
deriv fdqh_ieee_div_2_8i_rv64gc fdqh_ieee_div_2_4i_rv64gc
DIVCOPIES 32'd8
deriv fdqh_ieee_div_4_1i_rv64gc fdqh_ieee_div_4_1_rv64gc
IDIV_ON_FPU 1

View File

@ -9,6 +9,7 @@
#--showcommands
# Core settings
--variant RV32GC # for RV32GC
--override cpu/priv_version=1.12
--override cpu/user_version=20191213
# arch
@ -38,11 +39,12 @@
--override lr_sc_grain=8 # Za64rs requires <=64; we use native word size
# 64 KiB continuous huge pages supported
--override cpu/Svpbmt=T
--override cpu/Svnapot_page_mask=65536
#--override cpu/Svpbmt=F
#--override cpu/Svnapot_page_mask=65536
# SV39 and SV48 supported
--override cpu/Sv_modes=768
# SV32 supported
--override cpu/Sv_modes=3
#--showoverrides
--override cpu/Svinval=T
@ -59,7 +61,7 @@
--override cpu/reset_address=0x80000000
--override cpu/unaligned=T # Zicclsm (should be true)
--override cpu/unaligned=F # Zicclsm (should be true)
--override cpu/ignore_non_leaf_DAU=1
--override cpu/wfi_is_nop=T
--override cpu/misa_Extensions_mask=0x0 # MISA not writable
@ -74,7 +76,7 @@
--override cpu/PMP_undefined=T
# mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception
--override cpu/mstatus_fs_mode=rvfs_write_nz
--override cpu/mstatus_fs_mode=write_1
# PMA Settings
# 'r': read access allowed

117
config/rv64gc/imperas.ic Normal file
View File

@ -0,0 +1,117 @@
# imperas.ic
# Initialization file for ImperasDV lock step simulation
# David_Harris@hmc.edu 15 August 2024
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
#--mpdconsole
#--gdbconsole
#--showoverrides
#--showcommands
# Core settings
--override cpu/priv_version=1.12
--override cpu/user_version=20191213
# arch
--override cpu/mimpid=0x100
--override cpu/mvendorid=0x602
--override cpu/marchid=0x24
--override refRoot/cpu/tvec_align=64
--override refRoot/cpu/envcfg_mask=1 # dh 1/26/24 this should be deleted when ImperasDV is updated to allow envcfg.FIOM to be written
# bit manipulation
--override cpu/add_Extensions=B
--override cpu/bitmanip_version=1.0.0
--override cpu/misa_B_Zba_Zbb_Zbs=T
# More extensions
--override cpu/Zcb=T
--override cpu/Zicond=T
--override cpu/Zfh=T
--override cpu/Zfa=T
# Cache block operations
--override cpu/Zicbom=T
--override cpu/Zicbop=T
--override cpu/Zicboz=T
--override cmomp_bytes=64 # Zic64b
--override cmoz_bytes=64 # Zic64b
--override lr_sc_grain=8 # Za64rs requires <=64; we use native word size
# 64 KiB continuous huge pages supported
--override cpu/Svpbmt=T
--override cpu/Svnapot_page_mask=65536
# SV39 and SV48 supported
--override cpu/Sv_modes=768
--override cpu/Svinval=T
# clarify
#--override refRoot/cpu/mtvec_sext=F
--override cpu/tval_ii_code=T
#--override cpu/time_undefined=T
#--override cpu/cycle_undefined=T
#--override cpu/instret_undefined=T
#--override cpu/hpmcounter_undefined=T
--override cpu/reset_address=0x80000000
--override cpu/unaligned=T # Zicclsm (should be true)
--override cpu/ignore_non_leaf_DAU=1
--override cpu/wfi_is_nop=T
--override cpu/misa_Extensions_mask=0x0 # MISA not writable
--override cpu/Sstc=T
# Enable SVADU hardware update of A/D bits when menvcfg.ADUE=1
--override cpu/Svadu=T
#--override cpu/updatePTEA=F
#--override cpu/updatePTED=F
--override cpu/PMP_registers=16
--override cpu/PMP_undefined=T
# mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception
--override cpu/mstatus_fs_mode=write_1
# PMA Settings
# 'r': read access allowed
# 'w': write access allowed
# 'x': execute access allowed
# 'a': aligned access required
# 'A': atomic instructions NOT allowed (actually USER1 privilege needed)
# 'P': push/pop instructions NOT allowed (actually USER2 privilege needed)
# '1': 1-byte accesses allowed
# '2': 2-byte accesses allowed
# '4': 4-byte accesses allowed
# '8': 8-byte accesses allowed
# '-', space: ignored (use for input string formatting).
#
# SVxx Memory 0x0000000000 0x7FFFFFFFFF
#
--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0xFFFFFFFFFFFFFFFFFF -attributes " ---a-- ---- " # All memory inaccessible unless defined otherwise
--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0x7FFFFFFFFF -attributes " ---a-- ---- " # INITIAL
--callcommand refRoot/cpu/setPMA -lo 0x0000001000 -hi 0x0000001FFF -attributes " r-x-A- 1248 " # BOOTROM
--callcommand refRoot/cpu/setPMA -lo 0x0000012100 -hi 0x000001211F -attributes " rw-aA- --48 " # SDC
--callcommand refRoot/cpu/setPMA -lo 0x0002000000 -hi 0x000200FFFF -attributes " rw-aA- 1248 " # CLINT
--callcommand refRoot/cpu/setPMA -lo 0x000C000000 -hi 0x000FFFFFFF -attributes " rw-aA- --4- " # PLIC
--callcommand refRoot/cpu/setPMA -lo 0x0010000000 -hi 0x0010000007 -attributes " rw-aA- 1--- " # UART0 error - 0x10000000 - 0x100000FF
--callcommand refRoot/cpu/setPMA -lo 0x0010060000 -hi 0x00100600FF -attributes " rw-aA- --4- " # GPIO error - 0x10069000 - 0x100600FF
--callcommand refRoot/cpu/setPMA -lo 0x0010040000 -hi 0x0010040FFF -attributes " rw-aA- --4- " # SPI error - 0x10040000 - 0x10040FFF
--callcommand refRoot/cpu/setPMA -lo 0x0080000000 -hi 0x008FFFFFFF -attributes " rwx--- 1248 " # UNCORE_RAM
# Enable the Imperas instruction coverage
#-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0
#-override refRoot/cpu/cv/cover=basic
#-override refRoot/cpu/cv/extensions=RV32I
# Add Imperas simulator application instruction tracing
# uncomment these to provide tracing
#--verbose --trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange # --traceafter 300000000
#--override cpu/debugflags=6 --override cpu/verbose=1
#--override cpu/show_c_prefix=T
# Store simulator output to logfile
--output imperas.log

View File

@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF));
localparam NORMSHIFTSZDRSU = DIVb+1+NF;
localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU);
// Disable spurious Verilator warnings
/* verilator lint_off STMTDLY */

View File

@ -194,6 +194,8 @@ localparam cvw_t P = '{
FMALEN : FMALEN,
NORMSHIFTSZ : NORMSHIFTSZ,
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
NORMSHIFTSZDRSU : NORMSHIFTSZDRSU,
LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU,
LOGR : LOGR,
RK : RK,
FPDUR : FPDUR,

View File

@ -52,6 +52,42 @@ when 8 bytes are transferred
*/
// crc16 table to reduce byte processing time
static const uint16_t crctable[256] = {
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0
};
int disk_read(BYTE * buf, LBA_t sector, UINT count) {
uint64_t r;
UINT i, j;
@ -86,6 +122,7 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) {
for (i = 0; i < count; i++) {
uint16_t crc, crc_exp;
uint64_t n = 0;
uint64_t readCount = 0;
// Wait for data token
while((r = spi_dummy()) != SD_DATA_TOKEN);
@ -98,21 +135,45 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) {
/* crc = crc16(crc, x); */
/* } while (--n > 0); */
n = 512/8;
do {
// Send 8 dummy bytes (fifo should be empty)
/* n = 512/8; */
/* do { */
/* // Send 8 dummy bytes (fifo should be empty) */
/* for (j = 0; j < 8; j++) { */
/* spi_sendbyte(0xff); */
/* } */
/* // Reset counter. Process bytes AS THEY COME IN. */
/* for (j = 0; j < 8; j++) { */
/* while (!(read_reg(SPI_IP) & 2)) {} */
/* uint8_t x = spi_readbyte(); */
/* *p++ = x; */
/* // crc = crc16(crc, x); */
/* crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff; */
/* } */
/* } while(--n > 0); */
n = 512;
// Initially fill the transmit fifo
for (j = 0; j < 8; j++) {
spi_sendbyte(0xff);
}
// Reset counter. Process bytes AS THEY COME IN.
for (j = 0; j < 8; j++) {
while (n > 0) {
// Wait for bytes to be received
while (!(read_reg(SPI_IP) & 2)) {}
// Read byte
uint8_t x = spi_readbyte();
*p++ = x;
crc = crc16(crc, x);
// Send another dummy byte
if (n > 8) {
spi_sendbyte(0xff);
}
// Place received byte into memory
*p++ = x;
// Update CRC16 with fast table based method
crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff;
n = n - 1;
}
} while(--n > 0);
// Read CRC16 and check
crc_exp = ((uint16_t)spi_dummy() << 8);

View File

@ -1,3 +1,32 @@
///////////////////////////////////////////////////////////////////////
// spi.h
//
// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024
//
// Purpose: Header file for interfaceing with the SPI peripheral
//
//
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the
// “License”); you may not use this file except in compliance with the
// License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work
// distributed under the License is distributed on an “AS IS” BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
///////////////////////////////////////////////////////////////////////
#pragma once
#ifndef SPI_HEADER
#define SPI_HEADER

View File

@ -149,10 +149,32 @@ if {$FunctCoverageIndex >= 0} {
set lst [lreplace $lst $FunctCoverageIndex $FunctCoverageIndex]
}\
set FunctCoverageIndex2 [lsearch -exact $lst "--fcov2"]
if {$FunctCoverageIndex2 >= 0} {
set FunctCoverage 1
set riscvISACOVsrc +incdir+$env(IMPERAS_HOME)/ImpProprietary/source/host/riscvISACOV/source
set FCdefineINCLUDE_TRACE2COV "+define+INCLUDE_TRACE2COV"
set FCdefineCOVER_BASE_RV64I "+define+COVER_BASE_RV64I"
set FCdefineCOVER_LEVEL_DV_PR_EXT "+define+COVER_LEVEL_DV_PR_EXT"
# Uncomment various cover statements below to control which extensions get functional coverage
set FCdefineCOVER_RV64I "+define+COVER_RV64I"
#set FCdefineCOVER_RV64M "+define+COVER_RV64M"
#set FCdefineCOVER_RV64A "+define+COVER_RV64A"
#set FCdefineCOVER_RV64F "+define+COVER_RV64F"
#set FCdefineCOVER_RV64D "+define+COVER_RV64D"
#set FCdefineCOVER_RV64ZICSR "+define+COVER_RV64ZICSR"
#set FCdefineCOVER_RV64C "+define+COVER_RV64C"
set FCdefineIDV_INCLUDE_TRACE2COV "+define+IDV_INCLUDE_TRACE2COV"
set FCTRACE2COV "+TRACE2COV_ENABLE=1"
set FCdefineIDV_TRACE2COV "+IDV_TRACE2COV=1"
set lst [lreplace $lst $FunctCoverageIndex2 $FunctCoverageIndex2]
}\
set LockStepIndex [lsearch -exact $lst "--lockstep"]
# ugh. can't have more than 9 arguments passed to vsim. why? I'll have to remove --lockstep when running
# functional coverage and imply it.
if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0} {
if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0 || $FunctCoverageIndex2 >= 0} {
set lockstep 1
# ideally this would all be one or two variables, but questa is having a real hard time

View File

@ -11,6 +11,7 @@
# Must edit these based on your local environment.
export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu # Change this to your Siemens license server for Questa
export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server
export IMPERASD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Imperas license server
export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim # Change this for your path to Questa, excluding bin
export DC_HOME=/cad/synopsys/SYN # Change this for your path to Synopsys Design Compiler, excluding bin
export VCS_HOME=/cad/synopsys/vcs/U-2023.03-SP2-4 # Change this for your path to Synopsys VCS, excluding bin

View File

@ -285,6 +285,8 @@ typedef struct packed {
int LOGCVTLEN;
int NORMSHIFTSZ;
int LOGNORMSHIFTSZ;
int NORMSHIFTSZDRSU;
int LOGNORMSHIFTSZDRSU;
int FMALEN;
// division constants

View File

@ -0,0 +1,9 @@
module arithrightshift import cvw::*; #(parameter cvw_t P) (
input logic signed [P.INTDIVb+3:0] shiftin,
output logic signed [P.INTDIVb+3:0] shifted
);
assign shifted = $signed(shiftin) >>> P.LOGR;
endmodule

View File

@ -0,0 +1,110 @@
///////////////////////////////////////////
// divremsqrt.sv
//
// Written: kekim@hmc.edu
// Modified:19 May 2023
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrt import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic reset,
input logic [P.FMTBITS-1:0] FmtE,
input logic XsE,
input logic [P.NF:0] XmE, YmE,
input logic [P.NE-1:0] XeE, YeE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic FDivStartE, IDivStartE,
input logic StallM,
input logic FlushE,
input logic SqrtE, SqrtM,
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M,
input logic IntDivE, W64E,
output logic DivStickyM,
output logic FDivBusyE, IFDivStartE, FDivDoneE,
output logic [P.NE+1:0] UeM,
output logic [P.DIVb:0] UmM,
output logic [P.XLEN-1:0] FIntDivResultM,
output logic IntDivM,
// integer normalization shifter signals
output logic [P.INTDIVb+3:0] PreResultM,
input logic [P.XLEN-1:0] PreIntResultM,
output logic [P.DIVBLEN-1:0] IntNormShiftM
);
// Floating-point division and square root module, with optional integer division and remainder
// Computes X/Y, sqrt(X), A/B, or A%B
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
logic [P.DIVb+3:0] D; // Iterator Divisor
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
logic [P.DIVb+1:0] FirstC; // Step tracker
logic WZeroE; // Early termination flag
logic [P.DURLEN:0] CyclesE; // FSM cycles
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
logic DivStartE; // Enable signal for flops during stall
// Integer div/rem signals
logic BZeroM; // Denominator is zero
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
logic NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM; // Special handling for postprocessor
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases
divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
// Int-specific
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
.BZeroM, .AM,
.IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM);
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
// Int-specific
.IDivStartE, .ISpecialCaseE, .IntDivE);
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
.FirstU, .FirstUM, .FirstC, .FirstWS(WS), .FirstWC(WC));
divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
.SqrtE, .SqrtM, .SpecialCaseM,
.UmM, .WZeroE, .DivStickyM,
// Int-specific
.ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
.FIntDivResultM, .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM);
endmodule

View File

@ -0,0 +1,73 @@
///////////////////////////////////////////
// divshiftcalc.sv
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Division shift calculation
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtdivshiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.NF+2:0] DivUm, // divsqrt significand
input logic [P.NE+1:0] DivUe, // divsqrt exponent
output logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt, // divsqrt shift amount
output logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn, // divsqrt shift input
output logic DivResSubnorm, // is the divsqrt result subnormal
output logic DivSubnormShiftPos // is the subnormal shift amount positive
);
logic [P.LOGNORMSHIFTSZDRSU-1:0] NormShift; // normalized result shift amount
logic [P.LOGNORMSHIFTSZDRSU-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negative)
logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount
// is the result subnormal
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
// if the result is subnormal
// 00000000x.xxxxxx... Exp = DivUe
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivUe+NF+1-1
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivUe
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivUe+1
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF);
// if the shift amount is negative then don't shift (keep sticky bit)
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0;
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
// pre-shift the divider result for normalization
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}};
endmodule

View File

@ -0,0 +1,27 @@
module divremsqrtearlyterm import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
input logic [P.DIVb+3:0] D, // Q4.DIVb
input logic [P.DIVb:0] FirstUM, // U1.DIVb
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
input logic SqrtE,
output logic WZeroE
);
logic weq0E;
aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
if (P.RADIX == 2) begin: R2EarlyTerm
logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
logic [P.DIVb+2:0] FirstK;
logic wfeq0E;
logic [P.DIVb+3:0] WCF, WSF;
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
assign FZeroDivE = D << 1; // F for divide
mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
assign WZeroE = weq0E|wfeq0E;
end else begin
assign WZeroE = weq0E;
end
endmodule

View File

@ -0,0 +1,83 @@
///////////////////////////////////////////
// fdivsqrtcycles.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
// Modified: 18 April 2022
//
// Purpose: Determine number of cycles for divsqrt
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtfdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic IntDivE,
input logic [P.DIVBLEN-1:0] IntResultBitsE,
output logic [P.DURLEN:0] CyclesE
);
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
/* verilator lint_off WIDTH */
if (P.FPSIZES == 1)
assign Nf = P.NF;
else if (P.FPSIZES == 2)
always_comb
case (FmtE)
1'b0: Nf = P.NF1;
1'b1: Nf = P.NF;
endcase
else if (P.FPSIZES == 3)
always_comb
case (FmtE)
P.FMT: Nf = P.NF;
P.FMT1: Nf = P.NF1;
P.FMT2: Nf = P.NF2;
default: Nf = 'x; // shouldn't happen
endcase
else if (P.FPSIZES == 4)
always_comb
case(FmtE)
P.S_FMT: Nf = P.S_NF;
P.D_FMT: Nf = P.D_NF;
P.H_FMT: Nf = P.H_NF;
P.Q_FMT: Nf = P.Q_NF;
endcase
// Cycle logic
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
// Integer division needs p fractional + r integer result bits
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
always_comb begin
FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
else ResultBitsE = FPResultBitsE;
CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
end
/* verilator lint_on WIDTH */
endmodule

View File

@ -0,0 +1,79 @@
///////////////////////////////////////////
// fdivsqrtexpcalc.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Exponent caclulation for divide and square root
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtfdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt,
input logic [P.NE-1:0] Xe, Ye, // input exponents
input logic Sqrt,
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
output logic [P.NE+1:0] Ue // result exponent
);
logic [P.NE-2:0] Bias;
logic [P.NE+1:0] SXExp;
logic [P.NE+1:0] SExp;
logic [P.NE+1:0] DExp;
// Determine exponent bias according to the format
if (P.FPSIZES == 1) begin
assign Bias = (P.NE-1)'(P.BIAS);
end else if (P.FPSIZES == 2) begin
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
end else if (P.FPSIZES == 3) begin
always_comb
case (Fmt)
P.FMT: Bias = (P.NE-1)'(P.BIAS);
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
default: Bias = 'x;
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
// Select square root or division exponent
assign Ue = Sqrt ? SExp : DExp;
endmodule

View File

@ -0,0 +1,116 @@
///////////////////////////////////////////
// fdivsqrtpostproc.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Divide/Square root postprocessing
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtfdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic StallM,
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
input logic [P.DIVb+3:0] D, // Q4.DIVb
input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
input logic SqrtE,
input logic SqrtM, SpecialCaseM,
input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0)
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM,
input logic [P.DIVBLEN-1:0] IntNormShiftM,
input logic [P.XLEN-1:0] PreIntResultM,
output logic [P.DIVb:0] UmM, // U1.DIVb result significand
output logic WZeroE,
output logic DivStickyM,
output logic [P.XLEN-1:0] FIntDivResultM, // U/Q(XLEN.0)
output logic [P.INTDIVb+3:0] PreResultM
);
logic [P.DIVb+3:0] Sum;
logic [P.INTDIVb+3:0] W;
logic [P.DIVb:0] PreUmM;
logic NegStickyM;
logic weq0E, WZeroM;
logic [P.XLEN-1:0] IntDivResultM;
logic NegQuotM; // Integer quotient is negative
//////////////////////////
// Execute Stage: Detect early termination for an exact result
//////////////////////////
// check for early termination on an exact result.
divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS, .WZeroE);
//////////////////////////
// E/M Pipeline register
//////////////////////////
flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
//////////////////////////
// Memory Stage: Postprocessing
//////////////////////////
// If the result is not exact, the sticky should be set
assign DivStickyM = ~WZeroM & ~SpecialCaseM;
// Determine if sticky bit is negative *** Full sum only needed for Integer
assign Sum = WC + WS;
assign NegStickyM = Sum[P.DIVb+3];
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
// Integer quotient or remainder correction, normalization, and special cases
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM;
logic [P.INTDIVb+3:0] DTrunc, SumTrunc;
assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb];
assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb];
arithrightshift #(P) rshift(SumTrunc, W);
assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]};
// Integer remainder: sticky and sign correction muxes
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM);
// Select quotient or remainder and do normalization shift
mux2 #(P.INTDIVb+4) presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM);
intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM);
mux2 #(P.INTDIVb+4) preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM);
divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM);
// sign extend result for W64
if (P.XLEN==64) begin
mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
{{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
W64M, FIntDivResultM);
end else
assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
end
endmodule

View File

@ -0,0 +1,250 @@
///////////////////////////////////////////
// fdivsqrtpreproc.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtfdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic IFDivStartE,
input logic [P.NF:0] Xm, Ym, // Floating-point significands
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
output logic [P.NE+1:0] UeM, // biased exponent of result
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
// Int-specific
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
input logic IntDivE, W64E,
// Outputs
output logic ISpecialCaseE,
output logic [P.DURLEN:0] CyclesE,
output logic [P.DIVBLEN-1:0] IntNormShiftM,
output logic ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM,
output logic AsM, BsM, BZeroM,
output logic [P.XLEN-1:0] AM
);
logic [P.DIVb:0] Xnorm, Dnorm;
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs
logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result
logic NumerZeroE; // Numerator is zero (X or A)
logic SIGNOVERFLOWE;
logic AZeroE, BZeroE; // A or B is Zero for integer division
logic SignedDivE; // signed division
logic AsE, BsE; // Signs of integer inputs
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
logic ALTBE;
logic EvenExp;
logic [$clog2(P.RK):0] RightShiftX;
logic [P.DIVBLEN-1:0] ZeroDiff, p;
//////////////////////////////////////////////////////
// Integer Preprocessing
//////////////////////////////////////////////////////
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
logic [P.XLEN-1:0] BE, PosA, PosB;
// Extract inputs, signs, zero, depending on W64 mode if applicable
assign SignedDivE = ~Funct3E[0];
// Source handling
if (P.XLEN==64) begin // 64-bit, supports W64
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
end else begin // 32 bits only
assign AE = ForwardedSrcAE;
assign BE = ForwardedSrcBE;
end
assign AZeroE = ~(|AE);
assign BZeroE = ~(|BE);
assign AsE = AE[P.XLEN-1] & SignedDivE;
assign BsE = BE[P.XLEN-1] & SignedDivE;
// Force integer inputs to be postiive
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
// Select integer or floating point inputs
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
assign NumerZeroE = XZeroE;
end
//////////////////////////////////////////////////////
// Integer & FP leading zero and normalization shift
//////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs
divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell);
divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE);
// Normalization shift: shift leading one into most significant bit
assign Xnorm = (IFX << ell);
assign Dnorm = (IFD << mE);
//////////////////////////////////////////////////////
// Integer Right Shift to digit boundary
// Determine DivXShifted (X shifted to digit boundary)
// and nE (number of fractional digits)
//////////////////////////////////////////////////////
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
// calculate number of result bits
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros)
assign SIGNOVERFLOWE = 1'b0;
mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);
/* verilator lint_off WIDTH */
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
/* verilator lint_on WIDTH */
// Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE;
// calculate right shift amount RightShiftX to complete in discrete number of steps
if (P.RK > 1) begin // more than 1 bit per cycle
/* verilator lint_offf WIDTH */
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
/* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting
assign DivXShifted = DivX;
assign RightShiftX = 0;
end
end else begin
assign ISpecialCaseE = 0;
end
//////////////////////////////////////////////////////
// Floating-Point Preprocessing
// Extend to Q4.b format
// shift square root to be in range [1/4, 1)
// Normalized numbers are shifted right by 1 if the exponent is odd
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
//////////////////////////////////////////////////////
// Sqrt is initialized on step one as R(X-1), so depends on Radix
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
// Radix Exponent odd Exponent Even
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
// Summary: PreSqrtX = r(x/2or4 - 1)
logic [P.DIVb:0] PreSqrtX;
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even
/*
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
// This saves one bit in DIVb because there is no initial right shift.
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
// That is an optimization for another day.
if (P.RADIX == 2) begin
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
end else begin
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
end
*/
// Initialize X for division or square root
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
//////////////////////////////////////////////////////
// Selet integer or floating-point operands
//////////////////////////////////////////////////////
if (P.IDIV_ON_FPU) begin
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
end else begin
assign X = PreShiftX;
end
// Divisior register
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
// Floating-point exponent
divremsqrtfdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
// Number of FSM cycles (to FSM)
divremsqrtfdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
if (P.IDIV_ON_FPU) begin:intpipelineregs
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
logic RemOpE;
/* verilator lint_off WIDTH */
assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
/* verilator lint_on WIDTH */
assign RemOpE = Funct3E[1];
mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
// pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (P.XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end
endmodule

View File

@ -0,0 +1,183 @@
///////////////////////////////////////////
// flags.sv
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Post-Processing flag calculation
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtflags import cvw::*; #(parameter cvw_t P) (
input logic Xs, // X sign
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // is a Inf input being used
input logic XInf, YInf, // inputs are infinity
input logic NaNIn, // is a NaN input being used
input logic XSNaN, YSNaN, // inputs are signaling NaNs
input logic XZero, YZero, // inputs are zero
input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [P.NE+1:0] Me, // exponent of the normalized sum
// rounding
input logic Plus1, // do you add one for rounding
input logic Round, Guard, Sticky, // bits used to determine rounding
input logic UfPlus1, // do you add one for rounding for the unbounded exponent result
// divsqrt
input logic DivOp, // conversion opperation?
input logic Sqrt, // Sqrt?
// flags
output logic DivByZero, // divide by zero flag
output logic Overflow, // overflow flag to select result
output logic Invalid, // invalid flag to select the result
output logic [4:0] PostProcFlg // flags
);
logic SigNaN; // is an input a signaling NaN
logic Inexact; // final inexact flag
logic FpInexact; // floating point inexact flag
logic DivInvalid; // integer invalid flag
logic Underflow; // Underflow flag
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
///////////////////////////////////////////////////////////////////////////////
// Overflow
///////////////////////////////////////////////////////////////////////////////
// determine if the result exponent is greater than or equal to the maximum exponent or
// the shift amount is greater than the integers size (for cvt to int)
// ShiftGtIntSz calculation:
// a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN |
// | 64 0's | XLEN | << 64
// | XLEN | 00000... |
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
// | or | | or |
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
// | or | | or |
// larger or equal if:
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
if (P.FPSIZES == 1) begin
assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
end else if (P.FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
default: ResExpGteMax = 1'bx;
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
endcase
end
// calulate overflow flag:
// if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
///////////////////////////////////////////////////////////////////////////////
// Underflow
///////////////////////////////////////////////////////////////////////////////
// calculate underflow flag: detecting tininess after rounding
// the exponent is negitive
// | the result is subnormal
// | | the result is normal and rounded from a Subnorm
// | | | and if given an unbounded exponent the result does not round
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
//assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
///////////////////////////////////////////////////////////////////////////////
// Inexact
///////////////////////////////////////////////////////////////////////////////
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
//assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
// select the inexact flag to output
assign Inexact = FpInexact;
///////////////////////////////////////////////////////////////////////////////
// Invalid
///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign SigNaN = (XSNaN) | (YSNaN) ;
//invalid flag for division
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
assign Invalid = SigNaN | (DivInvalid&DivOp);
///////////////////////////////////////////////////////////////////////////////
// Divide by Zero
///////////////////////////////////////////////////////////////////////////////
// if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
///////////////////////////////////////////////////////////////////////////////
// final flags
///////////////////////////////////////////////////////////////////////////////
// Combine flags
// - to integer results do not set the underflow or overflow flags
assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
endmodule

View File

@ -0,0 +1,15 @@
module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) (
input logic BZeroM,RemOpM, ALTBM,
input logic [P.XLEN-1:0] AM,
input signed [P.INTDIVb+3:0] PreIntResultM,
output logic [P.XLEN-1:0] IntDivResultM
);
always_comb
if (BZeroM) begin // Divide by zero
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = {(P.XLEN){1'b1}};
end else if (ALTBM) begin // Numerator is small
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = 0;
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
endmodule

View File

@ -0,0 +1,39 @@
///////////////////////////////////////////
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: Leading Zero Counter
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtlzc #(parameter WIDTH = 1) (
input logic [WIDTH-1:0] num, // number to count the leading zeroes of
output logic [$clog2(WIDTH)-1:0] ZeroCnt // the number of leading zeroes
);
integer i;
always_comb begin
i = 0;
while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1; // search for leading one
ZeroCnt = i[$clog2(WIDTH)-1:0];
end
endmodule

View File

@ -0,0 +1,81 @@
///////////////////////////////////////////
// normshift.sv
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: normalization shifter
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
// convert shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if necessary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if necessary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if necessary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if necessary |
// | keep |
//
// fp -> fp:
// - if result is subnormal or underflowed:
// | `NF-1 zeros | Mantissa | 0's if necessary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is subnormal:
// | lzcIn | 0's if necessary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if necessary | << ZeroCnt+1
// - plus 1 to shift out the first 1
// fma shift
// | 00 | Sm | << LZA output
// .
// - two extra bits so we can correct for an LZA error of 1 or 2
// divsqrt shift
// | Nf 0's | Qm | << calculated shift amount
// .
module divremsqrtnormshift import cvw::*; #(parameter cvw_t P) (
input logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt, // shift amount
input logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn, // number to be shifted
output logic [P.NORMSHIFTSZDRSU-1:0] Shifted // shifted result
);
assign Shifted = ShiftIn << ShiftAmt;
endmodule

View File

@ -0,0 +1,177 @@
///////////////////////////////////////////
// postprocess.sv
//
// Written: kekim@hmc.edu
// Modified: 19 May 2023
//
// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) (
// general signals
input logic Xs, Ys, // input signs
input logic [P.NF:0] Xm, Ym, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [3:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, // inputs are zero
input logic XInf, YInf, // inputs are infinity
input logic XNaN, YNaN, // inputs are NaN
input logic XSNaN, YSNaN, // inputs are signaling NaNs
input logic [1:0] PostProcSel, // select result to be written to fp register
//fma signals
//divide signals
input logic DivSticky, // divider sticky bit
input logic [P.NE+1:0] DivUe, // divsqrt exponent
input logic [P.NF+2:0] DivUm, // divsqrt significand
input logic [P.DIVBLEN-1:0] IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right)
input logic [P.INTDIVb+3:0] PreResultM, // integer result to be shifted
input logic IntDivM,
// final results
output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result
output logic [4:0] PostProcFlg, // postprocesser flags
output logic [P.XLEN-1:0] PreIntResultM // normalized integer result
);
// general signals
logic Rs; // result sign
logic [P.NF-1:0] Rf; // Result fraction
logic [P.NE-1:0] Re; // Result exponent
logic Ms; // norMalized sign
logic [P.NORMSHIFTSZDRSU-1:0] Mf; // norMalized fraction
logic [P.NE+1:0] Me; // normalized exponent
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic UfPlus1; // do you add one (for determining underflow flag)
logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt; // normalization shift amount
logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn; // input to normalization shift
logic [P.NORMSHIFTSZDRSU-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
logic Plus1; // add one to the final result?
logic Overflow; // overflow flag used to select results
logic Invalid; // invalid flag used to select results
logic Guard, Round, Sticky; // bits needed to determine rounding
logic [P.FMTBITS-1:0] OutFmt; // output format
// division singals
logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt; // divsqrt shif amount
logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn; // divsqrt shift input
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
logic DivByZero; // divide by zero flag
logic DivResSubnorm; // is the divsqrt result subnormal
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
// conversion signals
logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result
logic CvtResUf; // did the convert result underflow
logic IntInvalid; // invalid integer flag
// readability signals
logic Mult; // multiply opperation
logic Sqrt; // is the divsqrt opperation sqrt
logic Int64; // is the integer 64 bits?
logic Signed; // is the opperation with a signed integer?
logic IntToFp; // is the opperation an int->fp conversion?
logic CvtOp; // convertion opperation
logic DivOp; // divider opperation
logic InfIn; // are any of the inputs infinity
logic NaNIn; // are any of the inputs NaN
// signals to help readability
assign DivOp = (PostProcSel == 2'b01);
assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used
assign InfIn = XInf|YInf;
assign NaNIn = XNaN|YNaN;
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (P.FPSIZES == 2)
//assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT);
assign OutFmt = Fmt;
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
//assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
assign OutFmt = Fmt;
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
// final claulations before shifting
divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
assign ShiftAmt = DivShiftAmt;
assign ShiftIn = DivShiftIn;
// main normalization shift
divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted);
// correct for LZA/divsqrt error
divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
// calulate result sign used in rounding unit
divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms);
divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue,
.Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
assign Rs = Ms;
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero,
.Xs, .OutFmt, .Sqrt,
.NaNIn, .Round, .DivByZero,
.Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1,
.Me, .Invalid, .Overflow, .PostProcFlg);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
//negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero,
.Frm, .OutFmt, .XNaN, .YNaN,
.NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
.XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
endmodule

View File

@ -0,0 +1,268 @@
///////////////////////////////////////////
// divremsqrtround.sv
//
// Written: kekim@hmc.edu, me@KatherineParry.com
// Modified: 19 May 2023
//
// Purpose: Rounder
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtround import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic [2:0] Frm, // rounding mode
input logic Ms, // normalized sign
input logic [P.NORMSHIFTSZDRSU-1:0] Mf, // normalized fraction
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
// outputs
output logic [P.NE+1:0] Me, // normalied fraction
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [P.NE-1:0] Re, // Result exponent
output logic [P.NF-1:0] Rf, // Result fractionNormS
output logic Sticky, // sticky bit
output logic Plus1, // do you add one to the final result
output logic Round, Guard // bits needed to calculate rounding
);
logic UfCalcPlus1; // calculated plus one for unbounded exponent
logic NormSticky; // normalized sum's sticky bit
logic [P.NF-1:0] RoundFrac; // rounded fraction
logic FpGuard, FpRound; // floating point round/guard bits
logic FpLsbRes; // least significant bit of floating point result
logic LsbRes; // lsb of result
logic CalcPlus1; // calculated plus1
logic FpPlus1; // do you add one to the fp result
logic [P.FLEN:0] RoundAdd; // how much to add to the result
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
// determine what format the final result is in: int or fp
// sticky bit calculation
if (P.FPSIZES == 1) begin
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
end else if (P.FPSIZES == 2) begin
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) |
(|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
end else if (P.FPSIZES == 3) begin
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) |
(|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) |
(|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
end else if (P.FPSIZES == 4) begin
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) |
(|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) |
(|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) |
(|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT));
end
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
//assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm;
assign Sticky = DivSticky&DivOp | NormSticky;
//assign Sticky = DivSticky&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (P.FPSIZES == 1) begin
assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
end else if (P.FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1];
assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
P.FMT: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
end
P.FMT1: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
end
P.FMT2: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2];
end
default: begin
FpGuard = 1'bx;
FpLsbRes = 1'bx;
FpRound = 1'bx;
end
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2];
end
2'h1: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2];
end
2'h0: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2];
end
2'h2: begin
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1];
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF];
FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2];
end
endcase
end
assign Guard = FpGuard;
assign LsbRes = FpLsbRes;
assign Round = FpRound;
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
assign FpPlus1 = Plus1;
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
// place Plus1 into the proper position for the format
if (P.FPSIZES == 1) begin
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
end else if (P.FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
end else if (P.FPSIZES == 3) begin
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
end else if (P.FPSIZES == 4)
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
// trim unneeded bits from fraction
assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF];
// select the exponent
assign Me = Ue;
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[P.NE-1:0];
endmodule

View File

@ -0,0 +1,45 @@
///////////////////////////////////////////
// divremsqrtroundsign.sv
//
// Written: kekim@hmc.edu,me@KatherineParry.com
// Modified: 19 May 2023
//
// Purpose: Sign calculation for rounding
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) (
input logic Xs, // x sign
input logic Ys, // y sign
input logic Sqrt, // sqrt oppertion? (when using divsqrt unit)
input logic DivOp, // is divsqrt opperation
output logic Ms // normalized result sign
);
logic Qs; // divsqrt result sign
// calculate divsqrt sign
assign Qs = Xs^(Ys&~Sqrt);
// Select sign for rounding calulation
assign Ms = (Qs&DivOp);
endmodule

View File

@ -0,0 +1,94 @@
///////////////////////////////////////////
// divremsqrtshiftcorrection.sv
//
// Written: me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: shift correction
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) (
input logic [P.NORMSHIFTSZDRSU-1:0] Shifted, // the shifted sum before LZA correction
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivResSubnorm, // is the divsqrt result subnormal
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
//fma
//input logic FmaOp, // is it an fma opperation
//input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
//input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
//input logic FmaSZero,
// output
//output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
output logic [P.NORMSHIFTSZDRSU-1:0] Mf, // the shifted sum before LZA correction
output logic [P.NE+1:0] Ue // corrected exponent for divider
);
logic [P.NORMSHIFTSZDRSU-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
logic [P.NORMSHIFTSZDRSU-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
logic ResSubnorm; // is the result Subnormal
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
// LZA correction
assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1];
// correct the shifting error caused by the LZA
// - the only possible mantissa for a plus two is all zeroes
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
//mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted);
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
//assign LeftShiftQm = ((DivUe==1));
assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}};
assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}};
mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
always_comb
//if(FmaOp) Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}};
//if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
if (~DivResSubnorm) Mf = CorrQmShifted;
else Mf = Shifted[P.NORMSHIFTSZDRSU-1:0];
// Determine sum's exponent
// main exponent issues:
// - LZA was one too large
// - LZA was two too large
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
// if plus1 If plus2 kill if the result Zero or actually subnormal
// | | |
//assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
// recalculate if the result is subnormal after LZA correction
//assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
//assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
endmodule

View File

@ -0,0 +1,240 @@
///////////////////////////////////////////
// divremsqrtspecialcase.sv
//
// Written: kekim@hmc.edu,me@KatherineParry.com
// Modified: 7/5/2022
//
// Purpose: special case selection
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) (
input logic Xs, // X sign
input logic [P.NF:0] Xm, Ym, // input significand's
input logic XNaN, YNaN, // are the inputs NaN
input logic [2:0] Frm, // rounding mode
input logic [P.FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // are any inputs infinity
input logic NaNIn, // are any input NaNs
input logic XInf, YInf, // are X or Y inifnity
input logic XZero, // is X zero
input logic Plus1, // do you add one for rounding
input logic Rs, // the result's sign
input logic Invalid, Overflow, // flags to choose the result
input logic [P.NE-1:0] Re, // Result exponent
input logic [P.NE+1:0] FullRe, // Result full exponent
input logic [P.NF-1:0] Rf, // Result fraction
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivByZero, // divide by zero flag
// outputs
output logic [P.FLEN-1:0] PostProcRes // final result
);
logic [P.FLEN-1:0] XNaNRes; // X is NaN result
logic [P.FLEN-1:0] YNaNRes; // Y is NaN result
logic [P.FLEN-1:0] InvalidRes; // Invalid result result
logic [P.FLEN-1:0] UfRes; // underflowed result result
logic [P.FLEN-1:0] OfRes; // overflowed result result
logic [P.FLEN-1:0] NormRes; // normal result
logic OfResMax; // does the of result output maximum norm fp number
logic KillRes; // kill the result for underflow
logic SelOfRes; // should the overflow result be selected
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
// select correct outputs for special cases
if (P.FPSIZES == 1) begin
//NaN res selection depending on standard
if(P.IEEE754) begin
assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = {Rs, Re, Rf};
end else if (P.FPSIZES == 2) begin
if(P.IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end
always_comb
if(OutFmt)
if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
else
if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
end else if (P.FPSIZES == 3) begin
always_comb
case (OutFmt)
P.FMT: begin
if(P.IEEE754) begin
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
P.FMT1: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end else begin
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
end
OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
end
P.FMT2: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
end else begin
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
end
OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
end
default: begin
if(P.IEEE754) begin
XNaNRes = (P.FLEN)'(0);
YNaNRes = (P.FLEN)'(0);
InvalidRes = (P.FLEN)'(0);
end else begin
InvalidRes = (P.FLEN)'(0);
end
OfRes = (P.FLEN)'(0);
UfRes = (P.FLEN)'(0);
NormRes = (P.FLEN)'(0);
end
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(P.IEEE754) begin
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
2'h1: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
end else begin
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
end
OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
end
2'h0: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
end else begin
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
end
OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
end
2'h2: begin
if(P.IEEE754) begin
XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
end else begin
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
end
OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};
// zero is exact if dividing by infinity so don't add 1
UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
end
endcase
end
// determine if you shoould kill the res - Cvt
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
// calculate if the overflow result should be selected
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero
if(P.IEEE754)
always_comb
if(XNaN) PostProcRes = XNaNRes;
else if(YNaN) PostProcRes = YNaNRes;
else if(Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
else
always_comb
if(NaNIn|Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
endmodule

102
src/fpu/divremsqrt/drsu.sv Normal file
View File

@ -0,0 +1,102 @@
///////////////////////////////////////////
// drsu.sv
//
// Written: kekim@hmc.edu
// Modified:19 May 2023
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module drsu import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic reset,
input logic [P.FMTBITS-1:0] FmtE,
input logic XsE, YsE,
input logic [P.NF:0] XmE, YmE,
input logic [P.NE-1:0] XeE, YeE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic XSNaNE, YSNaNE,
input logic FDivStartE, IDivStartE,
input logic StallM,
input logic FlushE,
input logic SqrtE, SqrtM,
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M,
input logic IntDivE, W64E,
input logic [2:0] Frm,
input logic [3:0] OpCtrl,
input logic [1:0] PostProcSel,
output logic FDivBusyE, IFDivStartE, FDivDoneE,
output logic [P.FLEN-1:0] FResM,
output logic [P.XLEN-1:0] FIntDivResultM,
output logic [4:0] FlgM
);
// Floating-point division and square root module, with optional integer division and remainder
// Computes X/Y, sqrt(X), A/B, or A%B
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
logic [P.DIVb+3:0] D; // Iterator Divisor
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
logic [P.DIVb+1:0] FirstC; // Step tracker
logic Firstun; // Quotient selection
logic WZeroE; // Early termination flag
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
logic DivStartE; // Enable signal for flops during stall
// Integer div/rem signals
logic BZeroM; // Denominator is zero
logic IntDivM; // Integer operation
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases
logic [P.DIVb:0] UmM;
logic [P.NF+2:0] UmMexact; //U1.NF+2
logic [P.NE+1:0] UeM;
logic DivStickyM;
logic [P.INTDIVb+3:0] PreResultM;
logic [P.XLEN-1:0] PreIntResultM;
logic [P.DIVBLEN-1:0] IntNormShiftM;
divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE,
.XeE, .YeE, .SqrtE, .SqrtM,
.XInfE, .YInfE, .XZeroE, .YZeroE,
.XNaNE, .YNaNE,
.FDivStartE, .IDivStartE, .W64E,
.StallM, .DivStickyM, .FDivBusyE, .UeM,
.UmM,
.FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
.Funct3E, .IntDivE, .FIntDivResultM, .IntDivM,
.FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM);
assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs
divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM,
.XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE),
.YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM),
.PreIntResultM, .PreResultM, .IntNormShiftM);
endmodule

View File

@ -0,0 +1,37 @@
///////////////////////////////////////////
// fdivsqrtpostproc.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Divide/Square root postprocessing
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module intrightshift import cvw::*; #(parameter cvw_t P) (
input logic signed [P.INTDIVb+3:0] shiftin,
input logic [P.DIVBLEN-1:0] shiftamt,
output logic signed [P.INTDIVb+3:0] shifted
);
assign shifted = shiftin >> shiftamt;
endmodule

View File

@ -169,12 +169,17 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi);
CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW;
CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[P.XLEN-1:0];
// user CSRs
CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW;
CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
if (P.XLEN == 32) begin
CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrsr.MSTATUSH_REGW;
CSRArray[12'h31A] = testbench.dut.core.priv.priv.csr.csrm.MENVCFGH_REGW;
CSRArray[12'h15D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[63:32];
end
end else begin // hold the old value if the pipeline is stalled.
// PMP CFG 3A0 to 3AF

1682
testbench/testbench-fp.sv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -762,7 +762,7 @@ end
void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org"));
void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv"));
void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GCK"));
void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 56));
void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, XLEN==64 ? 56 : 34));
void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6));
if(elffilename == "buildroot") filename = "";
@ -824,15 +824,25 @@ end
void'(rvviRefCsrSetVolatile(0, 32'hC02)); // INSTRET
void'(rvviRefCsrSetVolatile(0, 32'hB02)); // MINSTRET
void'(rvviRefCsrSetVolatile(0, 32'hC01)); // TIME
if (P.XLEN == 32) begin
void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH
void'(rvviRefCsrSetVolatile(0, 32'hB80)); // MCYCLEH
void'(rvviRefCsrSetVolatile(0, 32'hC82)); // INSTRETH
void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH
void'(rvviRefCsrSetVolatile(0, 32'hC81)); // TIMEH
end
// User HPMCOUNTER3 - HPMCOUNTER31
for (iter='hC03; iter<='hC1F; iter++) begin
void'(rvviRefCsrSetVolatile(0, iter)); // HPMCOUNTERx
if (P.XLEN == 32)
void'(rvviRefCsrSetVolatile(0, iter+128)); // HPMCOUNTERxH
end
// Machine MHPMCOUNTER3 - MHPMCOUNTER31
for (iter='hB03; iter<='hB1F; iter++) begin
void'(rvviRefCsrSetVolatile(0, iter)); // MHPMCOUNTERx
if (P.XLEN == 32)
void'(rvviRefCsrSetVolatile(0, iter+128)); // MHPMCOUNTERxH
end
// cannot predict this register due to latency between

File diff suppressed because it is too large Load Diff

639
testbench/tests-fp.vh Normal file
View File

@ -0,0 +1,639 @@
//////////////////////////////////////////
// tests0fo.vh
//
// Written: Katherine Parry 2022
// Modified:
//
// Purpose: List of floating-point tests to apply
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021-3 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`define PATH "../../tests/fp/vectors/"
`define ADD_OPCTRL 4'b0110
`define MUL_OPCTRL 4'b0100
`define SUB_OPCTRL 4'b0111
`define FMA_OPCTRL 4'b0000
`define DIV_OPCTRL 4'b0000
`define SQRT_OPCTRL 4'b0001
`define LE_OPCTRL 4'b0011
`define LT_OPCTRL 4'b0001
`define EQ_OPCTRL 4'b0010
`define TO_UI_OPCTRL 4'b0000
`define TO_I_OPCTRL 4'b0001
`define TO_UL_OPCTRL 4'b0010
`define TO_L_OPCTRL 4'b0011
`define FROM_UI_OPCTRL 4'b0100
`define FROM_I_OPCTRL 4'b0101
`define FROM_UL_OPCTRL 4'b0110
`define FROM_L_OPCTRL 4'b0111
`define INTREMU_OPCTRL 4'b1001
`define INTREM_OPCTRL 4'b1010
`define INTDIV_OPCTRL 4'b1011
`define INTDIVW_OPCTRL 4'b1100
`define INTDIVU_OPCTRL 4'b1101
`define INTREMW_OPCTRL 4'b1110
`define INTREMUW_OPCTRL 4'b1111
`define INTDIVUW_OPCTRL 4'b1000
`define RNE 3'b000
`define RZ 3'b001
`define RU 3'b011
`define RD 3'b010
`define RNM 3'b100
`define FMAUNIT 2
`define DIVUNIT 1
`define CVTINTUNIT 0
`define CVTFPUNIT 4
`define CMPUNIT 3
`define DIVREMSQRTUNIT 5
`define INTDIVUNIT 6
string f16rv32cvtint[] = '{
"ui32_to_f16_rne.tv",
"ui32_to_f16_rz.tv",
"ui32_to_f16_ru.tv",
"ui32_to_f16_rd.tv",
"ui32_to_f16_rnm.tv",
"i32_to_f16_rne.tv",
"i32_to_f16_rz.tv",
"i32_to_f16_ru.tv",
"i32_to_f16_rd.tv",
"i32_to_f16_rnm.tv",
"f16_to_ui32_rne.tv",
"f16_to_ui32_rz.tv",
"f16_to_ui32_ru.tv",
"f16_to_ui32_rd.tv",
"f16_to_ui32_rnm.tv",
"f16_to_i32_rne.tv",
"f16_to_i32_rz.tv",
"f16_to_i32_ru.tv",
"f16_to_i32_rd.tv",
"f16_to_i32_rnm.tv"
};
string f16rv64cvtint[] = '{
"ui64_to_f16_rne.tv",
"ui64_to_f16_rz.tv",
"ui64_to_f16_ru.tv",
"ui64_to_f16_rd.tv",
"ui64_to_f16_rnm.tv",
"i64_to_f16_rne.tv",
"i64_to_f16_rz.tv",
"i64_to_f16_ru.tv",
"i64_to_f16_rd.tv",
"i64_to_f16_rnm.tv",
"f16_to_ui64_rne.tv",
"f16_to_ui64_rz.tv",
"f16_to_ui64_ru.tv",
"f16_to_ui64_rd.tv",
"f16_to_ui64_rnm.tv",
"f16_to_i64_rne.tv",
"f16_to_i64_rz.tv",
"f16_to_i64_ru.tv",
"f16_to_i64_rd.tv",
"f16_to_i64_rnm.tv"
};
string f32rv32cvtint[] = '{
"ui32_to_f32_rne.tv",
"ui32_to_f32_rz.tv",
"ui32_to_f32_ru.tv",
"ui32_to_f32_rd.tv",
"ui32_to_f32_rnm.tv",
"i32_to_f32_rne.tv",
"i32_to_f32_rz.tv",
"i32_to_f32_ru.tv",
"i32_to_f32_rd.tv",
"i32_to_f32_rnm.tv",
"f32_to_ui32_rne.tv",
"f32_to_ui32_rz.tv",
"f32_to_ui32_ru.tv",
"f32_to_ui32_rd.tv",
"f32_to_ui32_rnm.tv",
"f32_to_i32_rne.tv",
"f32_to_i32_rz.tv",
"f32_to_i32_ru.tv",
"f32_to_i32_rd.tv",
"f32_to_i32_rnm.tv"
};
string f32rv64cvtint[] = '{
"ui64_to_f32_rne.tv",
"ui64_to_f32_rz.tv",
"ui64_to_f32_ru.tv",
"ui64_to_f32_rd.tv",
"ui64_to_f32_rnm.tv",
"i64_to_f32_rne.tv",
"i64_to_f32_rz.tv",
"i64_to_f32_ru.tv",
"i64_to_f32_rd.tv",
"i64_to_f32_rnm.tv",
"f32_to_ui64_rne.tv",
"f32_to_ui64_rz.tv",
"f32_to_ui64_ru.tv",
"f32_to_ui64_rd.tv",
"f32_to_ui64_rnm.tv",
"f32_to_i64_rne.tv",
"f32_to_i64_rz.tv",
"f32_to_i64_ru.tv",
"f32_to_i64_rd.tv",
"f32_to_i64_rnm.tv"
};
string f64rv32cvtint[] = '{
"ui32_to_f64_rne.tv",
"ui32_to_f64_rz.tv",
"ui32_to_f64_ru.tv",
"ui32_to_f64_rd.tv",
"ui32_to_f64_rnm.tv",
"i32_to_f64_rne.tv",
"i32_to_f64_rz.tv",
"i32_to_f64_ru.tv",
"i32_to_f64_rd.tv",
"i32_to_f64_rnm.tv",
"f64_to_ui32_rne.tv",
"f64_to_ui32_rz.tv",
"f64_to_ui32_ru.tv",
"f64_to_ui32_rd.tv",
"f64_to_ui32_rnm.tv",
"f64_to_i32_rne.tv",
"f64_to_i32_rz.tv",
"f64_to_i32_ru.tv",
"f64_to_i32_rd.tv",
"f64_to_i32_rnm.tv"
};
string f64rv64cvtint[] = '{
"ui64_to_f64_rne.tv",
"ui64_to_f64_rz.tv",
"ui64_to_f64_ru.tv",
"ui64_to_f64_rd.tv",
"ui64_to_f64_rnm.tv",
"i64_to_f64_rne.tv",
"i64_to_f64_rz.tv",
"i64_to_f64_ru.tv",
"i64_to_f64_rd.tv",
"i64_to_f64_rnm.tv",
"f64_to_ui64_rne.tv",
"f64_to_ui64_rz.tv",
"f64_to_ui64_ru.tv",
"f64_to_ui64_rd.tv",
"f64_to_ui64_rnm.tv",
"f64_to_i64_rne.tv",
"f64_to_i64_rz.tv",
"f64_to_i64_ru.tv",
"f64_to_i64_rd.tv",
"f64_to_i64_rnm.tv"
};
string f128rv64cvtint[] = '{
"ui64_to_f128_rne.tv",
"ui64_to_f128_rz.tv",
"ui64_to_f128_ru.tv",
"ui64_to_f128_rd.tv",
"ui64_to_f128_rnm.tv",
"i64_to_f128_rne.tv",
"i64_to_f128_rz.tv",
"i64_to_f128_ru.tv",
"i64_to_f128_rd.tv",
"i64_to_f128_rnm.tv",
"f128_to_ui64_rne.tv",
"f128_to_ui64_rz.tv",
"f128_to_ui64_ru.tv",
"f128_to_ui64_rd.tv",
"f128_to_ui64_rnm.tv",
"f128_to_i64_rne.tv",
"f128_to_i64_rz.tv",
"f128_to_i64_ru.tv",
"f128_to_i64_rd.tv",
"f128_to_i64_rnm.tv"
};
string f128rv32cvtint[] = '{
"ui32_to_f128_rne.tv",
"ui32_to_f128_rz.tv",
"ui32_to_f128_ru.tv",
"ui32_to_f128_rd.tv",
"ui32_to_f128_rnm.tv",
"i32_to_f128_rne.tv",
"i32_to_f128_rz.tv",
"i32_to_f128_ru.tv",
"i32_to_f128_rd.tv",
"i32_to_f128_rnm.tv",
"f128_to_ui32_rne.tv",
"f128_to_ui32_rz.tv",
"f128_to_ui32_ru.tv",
"f128_to_ui32_rd.tv",
"f128_to_ui32_rnm.tv",
"f128_to_i32_rne.tv",
"f128_to_i32_rz.tv",
"f128_to_i32_ru.tv",
"f128_to_i32_rd.tv",
"f128_to_i32_rnm.tv"
};
string f32f16cvt[] = '{
"f32_to_f16_rne.tv",
"f32_to_f16_rz.tv",
"f32_to_f16_ru.tv",
"f32_to_f16_rd.tv",
"f32_to_f16_rnm.tv",
"f16_to_f32_rne.tv",
"f16_to_f32_rz.tv",
"f16_to_f32_ru.tv",
"f16_to_f32_rd.tv",
"f16_to_f32_rnm.tv"
};
string f64f16cvt[] = '{
"f64_to_f16_rne.tv",
"f64_to_f16_rz.tv",
"f64_to_f16_ru.tv",
"f64_to_f16_rd.tv",
"f64_to_f16_rnm.tv",
"f16_to_f64_rne.tv",
"f16_to_f64_rz.tv",
"f16_to_f64_ru.tv",
"f16_to_f64_rd.tv",
"f16_to_f64_rnm.tv"
};
string f128f16cvt[] = '{
"f128_to_f16_rne.tv",
"f128_to_f16_rz.tv",
"f128_to_f16_ru.tv",
"f128_to_f16_rd.tv",
"f128_to_f16_rnm.tv",
"f16_to_f128_rne.tv",
"f16_to_f128_rz.tv",
"f16_to_f128_ru.tv",
"f16_to_f128_rd.tv",
"f16_to_f128_rnm.tv"
};
string f64f32cvt[] = '{
"f64_to_f32_rne.tv",
"f64_to_f32_rz.tv",
"f64_to_f32_ru.tv",
"f64_to_f32_rd.tv",
"f64_to_f32_rnm.tv",
"f32_to_f64_rne.tv",
"f32_to_f64_rz.tv",
"f32_to_f64_ru.tv",
"f32_to_f64_rd.tv",
"f32_to_f64_rnm.tv"
};
string f128f32cvt[] = '{
"f128_to_f32_rne.tv",
"f128_to_f32_rz.tv",
"f128_to_f32_ru.tv",
"f128_to_f32_rd.tv",
"f128_to_f32_rnm.tv",
"f32_to_f128_rne.tv",
"f32_to_f128_rz.tv",
"f32_to_f128_ru.tv",
"f32_to_f128_rd.tv",
"f32_to_f128_rnm.tv"
};
string f128f64cvt[] = '{
"f128_to_f64_rne.tv",
"f128_to_f64_rz.tv",
"f128_to_f64_ru.tv",
"f128_to_f64_rd.tv",
"f128_to_f64_rnm.tv",
"f64_to_f128_rne.tv",
"f64_to_f128_rz.tv",
"f64_to_f128_ru.tv",
"f64_to_f128_rd.tv",
"f64_to_f128_rnm.tv"
};
string f16add[] = '{
"f16_add_rne.tv",
"f16_add_rz.tv",
"f16_add_ru.tv",
"f16_add_rd.tv",
"f16_add_rnm.tv"
};
string f32add[] = '{
"f32_add_rne.tv",
"f32_add_rz.tv",
"f32_add_ru.tv",
"f32_add_rd.tv",
"f32_add_rnm.tv"
};
string f64add[] = '{
"f64_add_rne.tv",
"f64_add_rz.tv",
"f64_add_ru.tv",
"f64_add_rd.tv",
"f64_add_rnm.tv"
};
string f128add[] = '{
"f128_add_rne.tv",
"f128_add_rz.tv",
"f128_add_ru.tv",
"f128_add_rd.tv",
"f128_add_rnm.tv"
};
string f16sub[] = '{
"f16_sub_rne.tv",
"f16_sub_rz.tv",
"f16_sub_ru.tv",
"f16_sub_rd.tv",
"f16_sub_rnm.tv"
};
string f32sub[] = '{
"f32_sub_rne.tv",
"f32_sub_rz.tv",
"f32_sub_ru.tv",
"f32_sub_rd.tv",
"f32_sub_rnm.tv"
};
string f64sub[] = '{
"f64_sub_rne.tv",
"f64_sub_rz.tv",
"f64_sub_ru.tv",
"f64_sub_rd.tv",
"f64_sub_rnm.tv"
};
string f128sub[] = '{
"f128_sub_rne.tv",
"f128_sub_rz.tv",
"f128_sub_ru.tv",
"f128_sub_rd.tv",
"f128_sub_rnm.tv"
};
string f16mul[] = '{
"f16_mul_rne.tv",
"f16_mul_rz.tv",
"f16_mul_ru.tv",
"f16_mul_rd.tv",
"f16_mul_rnm.tv"
};
string f32mul[] = '{
"f32_mul_rne.tv",
"f32_mul_rz.tv",
"f32_mul_ru.tv",
"f32_mul_rd.tv",
"f32_mul_rnm.tv"
};
string f64mul[] = '{
"f64_mul_rne.tv",
"f64_mul_rz.tv",
"f64_mul_ru.tv",
"f64_mul_rd.tv",
"f64_mul_rnm.tv"
};
string f128mul[] = '{
"f128_mul_rne.tv",
"f128_mul_rz.tv",
"f128_mul_ru.tv",
"f128_mul_rd.tv",
"f128_mul_rnm.tv"
};
string f16div[] = '{
"f16_div_rne.tv",
"f16_div_rz.tv",
"f16_div_ru.tv",
"f16_div_rd.tv",
"f16_div_rnm.tv"
};
string f32div[] = '{
"f32_div_rne.tv",
"f32_div_rz.tv",
"f32_div_ru.tv",
"f32_div_rd.tv",
"f32_div_rnm.tv"
};
string f64div[] = '{
"f64_div_rne.tv",
"f64_div_rz.tv",
"f64_div_ru.tv",
"f64_div_rd.tv",
"f64_div_rnm.tv"
};
string f128div[] = '{
"f128_div_rne.tv",
"f128_div_rz.tv",
"f128_div_ru.tv",
"f128_div_rd.tv",
"f128_div_rnm.tv"
};
string f16sqrt[] = '{
"f16_sqrt_rne.tv",
"f16_sqrt_rz.tv",
"f16_sqrt_ru.tv",
"f16_sqrt_rd.tv",
"f16_sqrt_rnm.tv"
};
string f32sqrt[] = '{
"f32_sqrt_rne.tv",
"f32_sqrt_rz.tv",
"f32_sqrt_ru.tv",
"f32_sqrt_rd.tv",
"f32_sqrt_rnm.tv"
};
string f64sqrt[] = '{
"f64_sqrt_rne.tv",
"f64_sqrt_rz.tv",
"f64_sqrt_ru.tv",
"f64_sqrt_rd.tv",
"f64_sqrt_rnm.tv"
};
string f128sqrt[] = '{
"f128_sqrt_rne.tv",
"f128_sqrt_rz.tv",
"f128_sqrt_ru.tv",
"f128_sqrt_rd.tv",
"f128_sqrt_rnm.tv"
};
string f16cmp[] = '{
"f16_eq_rne.tv",
"f16_eq_rz.tv",
"f16_eq_ru.tv",
"f16_eq_rd.tv",
"f16_eq_rnm.tv",
"f16_le_rne.tv",
"f16_le_rz.tv",
"f16_le_ru.tv",
"f16_le_rd.tv",
"f16_le_rnm.tv",
"f16_lt_rne.tv",
"f16_lt_rz.tv",
"f16_lt_ru.tv",
"f16_lt_rd.tv",
"f16_lt_rnm.tv"
};
string f32cmp[] = '{
"f32_eq_rne.tv",
"f32_eq_rz.tv",
"f32_eq_ru.tv",
"f32_eq_rd.tv",
"f32_eq_rnm.tv",
"f32_le_rne.tv",
"f32_le_rz.tv",
"f32_le_ru.tv",
"f32_le_rd.tv",
"f32_le_rnm.tv",
"f32_lt_rne.tv",
"f32_lt_rz.tv",
"f32_lt_ru.tv",
"f32_lt_rd.tv",
"f32_lt_rnm.tv"
};
string f64cmp[] = '{
"f64_eq_rne.tv",
"f64_eq_rz.tv",
"f64_eq_ru.tv",
"f64_eq_rd.tv",
"f64_eq_rnm.tv",
"f64_le_rne.tv",
"f64_le_rz.tv",
"f64_le_ru.tv",
"f64_le_rd.tv",
"f64_le_rnm.tv",
"f64_lt_rne.tv",
"f64_lt_rz.tv",
"f64_lt_ru.tv",
"f64_lt_rd.tv",
"f64_lt_rnm.tv"
};
string f128cmp[] = '{
"f128_eq_rne.tv",
"f128_eq_rz.tv",
"f128_eq_ru.tv",
"f128_eq_rd.tv",
"f128_eq_rnm.tv",
"f128_le_rne.tv",
"f128_le_rz.tv",
"f128_le_ru.tv",
"f128_le_rd.tv",
"f128_le_rnm.tv",
"f128_lt_rne.tv",
"f128_lt_rz.tv",
"f128_lt_ru.tv",
"f128_lt_rd.tv",
"f128_lt_rnm.tv"
};
string f16fma[] = '{
"f16_mulAdd_rne.tv",
"f16_mulAdd_rz.tv",
"f16_mulAdd_ru.tv",
"f16_mulAdd_rd.tv",
"f16_mulAdd_rnm.tv"
};
string f32fma[] = '{
"f32_mulAdd_rne.tv",
"f32_mulAdd_rz.tv",
"f32_mulAdd_ru.tv",
"f32_mulAdd_rd.tv",
"f32_mulAdd_rnm.tv"
};
string f64fma[] = '{
"f64_mulAdd_rne.tv",
"f64_mulAdd_rz.tv",
"f64_mulAdd_ru.tv",
"f64_mulAdd_rd.tv",
"f64_mulAdd_rnm.tv"
};
string f128fma[] = '{
"f128_mulAdd_rne.tv",
"f128_mulAdd_rz.tv",
"f128_mulAdd_ru.tv",
"f128_mulAdd_rd.tv",
"f128_mulAdd_rnm.tv"
};
string int64rem[] = '{
"cvw_64_rem-01.tv"
};
string int64div[] = '{
"cvw_64_div-01.tv"
};
string int64remu[] = '{
"cvw_64_remu-01.tv"
};
string int64divu[] = '{
"cvw_64_divu-01.tv"
};
string int64remw[] = '{
"cvw_64_remw-01.tv"
};
string int64remuw[] = '{
"cvw_64_remuw-01.tv"
};
string int64divuw[] = '{
"cvw_64_divuw-01.tv"
};
string int64divw[] = '{
"cvw_64_divw-01.tv"
};
string int32rem[] = '{
"cvw_32_rem-01.tv"
};
string int32div[] = '{
"cvw_32_div-01.tv"
};
string int32remu[] = '{
"cvw_32_remu-01.tv"
};
string int32divu[] = '{
"cvw_32_divu-01.tv"
};

View File

@ -0,0 +1,112 @@
CEXT := c
CPPEXT := cpp
AEXT := s
SEXT := S
SRCEXT := \([$(CEXT)$(AEXT)$(SEXT)]\|$(CPPEXT)\)
OBJEXT := o
DEPEXT := d
SRCDIR := .
BUILDDIR := OBJ
SOURCES ?= $(shell find $(SRCDIR) -type f -regex ".*\.$(SRCEXT)" | sort)
OBJECTS := $(SOURCES:.$(CEXT)=.$(OBJEXT))
OBJECTS := $(OBJECTS:.$(AEXT)=.$(OBJEXT))
OBJECTS := $(OBJECTS:.$(SEXT)=.$(OBJEXT))
OBJECTS := $(OBJECTS:.$(CPPEXT)=.$(OBJEXT))
OBJECTS := $(patsubst $(SRCDIR)/%,$(BUILDDIR)/%,$(OBJECTS))
TARGETDIR := bin
TARGET := $(TARGETDIR)/spitest.elf
ROOT := ..
LIBRARY_DIRS :=
LIBRARY_FILES :=
MARCH :=-march=rv64imfdc
MABI :=-mabi=lp64d
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles
LINKER :=$(ROOT)/linker8000-0000.x
AFLAGS =$(MARCH) $(MABI) -W
CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2
AS=riscv64-unknown-elf-as
CC=riscv64-unknown-elf-gcc
AR=riscv64-unknown-elf-ar
#Default Make
all: directories $(TARGET).memfile
#Remake
remake: clean all
#Make the Directories
directories:
@mkdir -p $(TARGETDIR)
@mkdir -p $(BUILDDIR)
clean:
rm -rf $(BUILDDIR) $(TARGETDIR) *.memfile *.objdump
#Needed for building additional library projects
ifdef LIBRARY_DIRS
LIBS+=${LIBRARY_DIRS:%=-L%} ${LIBRARY_FILES:%=-l%}
INC+=${LIBRARY_DIRS:%=-I%}
${LIBRARY_DIRS}:
make -C $@ -j 1
.PHONY: $(LIBRARY_DIRS) $(TARGET)
endif
#Pull in dependency info for *existing* .o files
-include $(OBJECTS:.$(OBJEXT)=.$(DEPEXT))
#Link
$(TARGET): $(OBJECTS) $(LIBRARY_DIRS)
$(CC) $(LINK_FLAGS) -g -o $(TARGET) $(OBJECTS) ${LIBS} -T ${LINKER}
#Compile
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CEXT) > $(BUILDDIR)/$*.$(DEPEXT)
@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
# gcc won't output dependencies for assembly files for some reason
# most asm files don't have dependencies so the echo will work for now.
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(AEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) -c -o $@ $< > $(BUILDDIR)/$*.list
@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(SEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
# C++
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CPPEXT)
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CPPEXT) > $(BUILDDIR)/$*.$(DEPEXT)
@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
# convert to hex
$(TARGET).memfile: $(TARGET)
@echo 'Making object dump file.'
@riscv64-unknown-elf-objdump -D $< > $<.objdump
@echo 'Making memory file'
riscv64-unknown-elf-elf2hex --bit-width 64 --input $^ --output $@
extractFunctionRadix.sh $<.objdump
mkdir -p ../work/
cp -f $(TARGETDIR)/* ../work/

116
tests/custom/spitest/spi.h Normal file
View File

@ -0,0 +1,116 @@
///////////////////////////////////////////////////////////////////////
// spi.h
//
// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024
//
// Purpose: Header file for interfaceing with the SPI peripheral
//
//
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the
// “License”); you may not use this file except in compliance with the
// License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work
// distributed under the License is distributed on an “AS IS” BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
///////////////////////////////////////////////////////////////////////
#pragma once
#ifndef SPI_HEADER
#define SPI_HEADER
#include <stdint.h>
#define SPI_BASE 0x13000 /* Base address of SPI device used for SDC */
/* register offsets */
#define SPI_SCKDIV SPI_BASE + 0x00 /* Serial clock divisor */
#define SPI_SCKMODE SPI_BASE + 0x04 /* Serial clock mode */
#define SPI_CSID SPI_BASE + 0x10 /* Chip select ID */
#define SPI_CSDEF SPI_BASE + 0x14 /* Chip select default */
#define SPI_CSMODE SPI_BASE + 0x18 /* Chip select mode */
#define SPI_DELAY0 SPI_BASE + 0x28 /* Delay control 0 */
#define SPI_DELAY1 SPI_BASE + 0x2c /* Delay control 1 */
#define SPI_FMT SPI_BASE + 0x40 /* Frame format */
#define SPI_TXDATA SPI_BASE + 0x48 /* Tx FIFO data */
#define SPI_RXDATA SPI_BASE + 0x4c /* Rx FIFO data */
#define SPI_TXMARK SPI_BASE + 0x50 /* Tx FIFO [<35;39;29Mwatermark */
#define SPI_RXMARK SPI_BASE + 0x54 /* Rx FIFO watermark */
/* Non-implemented
#define SPI_FCTRL SPI_BASE + 0x60 // SPI flash interface control
#define SPI_FFMT SPI_BASE + 0x64 // SPI flash instruction format
*/
#define SPI_IE SPI_BASE + 0x70 /* Interrupt Enable Register */
#define SPI_IP SPI_BASE + 0x74 /* Interrupt Pendings Register */
/* delay0 bits */
#define SIFIVE_SPI_DELAY0_CSSCK(x) ((uint32_t)(x))
#define SIFIVE_SPI_DELAY0_CSSCK_MASK 0xffU
#define SIFIVE_SPI_DELAY0_SCKCS(x) ((uint32_t)(x) << 16)
#define SIFIVE_SPI_DELAY0_SCKCS_MASK (0xffU << 16)
/* delay1 bits */
#define SIFIVE_SPI_DELAY1_INTERCS(x) ((uint32_t)(x))
#define SIFIVE_SPI_DELAY1_INTERCS_MASK 0xffU
#define SIFIVE_SPI_DELAY1_INTERXFR(x) ((uint32_t)(x) << 16)
#define SIFIVE_SPI_DELAY1_INTERXFR_MASK (0xffU << 16)
/* csmode bits */
#define SIFIVE_SPI_CSMODE_MODE_AUTO 0U
#define SIFIVE_SPI_CSMODE_MODE_HOLD 2U
#define SIFIVE_SPI_CSMODE_MODE_OFF 3U
// inline void write_reg(uintptr_t addr, uint32_t value);
//inline uint32_t read_reg(uintptr_t addr);
//inline void spi_sendbyte(uint8_t byte);
//inline void waittx();
//inline void waitrx();
uint8_t spi_txrx(uint8_t byte);
uint8_t spi_dummy();
//inline uint8_t spi_readbyte();
//uint64_t spi_read64();
void spi_init();
void spi_set_clock(uint32_t clkin, uint32_t clkout);
static inline void write_reg(uintptr_t addr, uint32_t value) {
volatile uint32_t * loc = (volatile uint32_t *) addr;
*loc = value;
}
// Read a register
static inline uint32_t read_reg(uintptr_t addr) {
return *(volatile uint32_t *) addr;
}
// Queues a single byte in the transfer fifo
static inline void spi_sendbyte(uint8_t byte) {
// Write byte to transfer fifo
write_reg(SPI_TXDATA, byte);
}
static inline void waittx() {
while(!(read_reg(SPI_IP) & 1)) {}
}
static inline void waitrx() {
while(read_reg(SPI_IP) & 2) {}
}
static inline uint8_t spi_readbyte() {
return read_reg(SPI_RXDATA);
}
#endif

View File

@ -0,0 +1,107 @@
///////////////////////////////////////////////////////////////////////
// spi.c
//
// Written: Jaocb Pease jacob.pease@okstate.edu 8/27/2024
//
// Purpose: C code to test SPI bugs
//
//
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the
// “License”); you may not use this file except in compliance with the
// License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work
// distributed under the License is distributed on an “AS IS” BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
///////////////////////////////////////////////////////////////////////
#include "spi.h"
// Testing SPI peripheral in loopback mode
// TODO: Need to make sure the configuration I'm using uses loopback
// mode. This can be specified in derivlists.txt
// TODO:
uint8_t spi_txrx(uint8_t byte) {
spi_sendbyte(byte);
waittx();
return spi_readbyte();
}
uint8_t spi_dummy() {
return spi_txrx(0xff);
}
void spi_set_clock(uint32_t clkin, uint32_t clkout) {
uint32_t div = (clkin/(2*clkout)) - 1;
write_reg(SPI_SCKDIV, div);
}
// Initialize Sifive FU540 based SPI Controller
void spi_init(uint32_t clkin) {
// Enable interrupts
write_reg(SPI_IE, 0x3);
// Set TXMARK to 1. If the number of entries is < 1
// IP's txwm field will go high.
// Set RXMARK to 0. If the number of entries is > 0
// IP's rwxm field will go high.
write_reg(SPI_TXMARK, 1);
write_reg(SPI_RXMARK, 0);
// Set Delay 0 to default
write_reg(SPI_DELAY0,
SIFIVE_SPI_DELAY0_CSSCK(1) |
SIFIVE_SPI_DELAY0_SCKCS(1));
// Set Delay 1 to default
write_reg(SPI_DELAY1,
SIFIVE_SPI_DELAY1_INTERCS(1) |
SIFIVE_SPI_DELAY1_INTERXFR(0));
// Initialize the SPI controller clock to
// div = (20MHz/(2*400kHz)) - 1 = 24 = 0x18
write_reg(SPI_SCKDIV, 0x18);
}
void main() {
spi_init(100000000);
spi_set_clock(100000000,50000000);
volatile uint8_t *p = (uint8_t *)(0x8F000000);
int j;
uint64_t n = 0;
write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_HOLD);
//n = 512/8;
n = 4;
do {
// Send 8 dummy bytes (fifo should be empty)
for (j = 0; j < 8; j++) {
spi_sendbyte(0xaa + j);
}
// Reset counter. Process bytes AS THEY COME IN.
for (j = 0; j < 8; j++) {
while (!(read_reg(SPI_IP) & 2)) {}
uint8_t x = spi_readbyte();
*p++ = x;
}
} while(--n > 0);
write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_AUTO);
}

View File

@ -0,0 +1,59 @@
.section .init
.global _start
.type _start, @function
_start:
# Initialize global pointer
.option push
.option norelax
1:auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
.option pop
li x1, 0
li x2, 0
li x4, 0
li x5, 0
li x6, 0
li x7, 0
li x8, 0
li x9, 0
li x10, 0
li x11, 0
li x12, 0
li x13, 0
li x14, 0
li x15, 0
li x16, 0
li x17, 0
li x18, 0
li x19, 0
li x20, 0
li x21, 0
li x22, 0
li x23, 0
li x24, 0
li x25, 0
li x26, 0
li x27, 0
li x28, 0
li x29, 0
li x30, 0
li x31, 0
# set the stack pointer to the top of memory - 8 bytes (pointer size)
li sp, 0x87FFFFF8
jal ra, main
jal ra, _halt
.section .text
.global _halt
.type _halt, @function
_halt:
li gp, 1
li a0, 0
ecall
j _halt

View File

@ -1,5 +1,7 @@
#!/bin/sh
# create test vectors for stand alone int
mkdir IF_vectors
./extract_testfloat_vectors.py
./extract_arch_vectors.py
cp IF_vectors/* ../vectors