mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge pull request #936 from kevindkim723/divremsqrtport
adding divremsqrt paper files
This commit is contained in:
commit
3a772416df
46
README.md
46
README.md
@ -1,3 +1,49 @@
|
||||
# divremsqrt
|
||||
This branch contains the relevant hardware and test/synthesis flows for cvw's unified integer/fp divide/sqrt recurrence unit. The recurrence unit can be generated for a variety configurations, which span flavors of radix = {2,4}, floating-point precision = {float,double,quad}, integer width = {unsupported,32,64} and divider copies = {1,2,4,8}.
|
||||
|
||||
The fpu postprocessor on cvw handles inputs not only from the div/sqrt unit, but also the fma and convert units. This branch's drsu unit contains a postprocessor with logic only relevant to division/sqrt.
|
||||
|
||||
# file hiearchy
|
||||
|
||||
The RTL files for the divider can be found under `cvw/src/fpu`
|
||||
|
||||
The majority of divider modules are found in `cvw/src/fpu/divremsqrt`, which also borrows some modules from `cvw/src/fpu/fdivsqrt`
|
||||
|
||||
divremsqrt/drsu desribes the top-level unit for the divider, taking in unpacked floating point signals, including Xs, Xm Xe, Ys, Ym, Ye.
|
||||
|
||||
drsu first feeds signals to `divremsqrt/divremsqrt`, which contains the preprocessor, iteration units, fsm, and postprocessing logic. The postprocessor in `divremsqrt/divremsqrt` also contains all integer postprocessing logic. Outputs from `divremsqrt/divremsqrt` are then sent to `divremsqrt/divremsqrtpostprocess`, which handles rounding and flags.
|
||||
|
||||
# verification flow
|
||||
|
||||
drsu is verified with the risc-v arch test Berkeley SoftFloat floating point suite of test vectors for floating point square-root and division. In order to run the top-level regression script, run `regression-wally-intdiv -intdiv`
|
||||
|
||||
The top-level regression python script is found accordingly in `cvw/bin/regression-wally-intdiv`. The testbench is found in `cvw/testbench/testbench_fp`, which runs drsu against testvectors. Batches of testvectors are stored within `cvw/testbench/tests-fp.vh`, and the raw binary test vectors are read from `tests/fp/vectors`
|
||||
|
||||
Regression log files can be found in `cvw/sim/questa/logs` after running `regression-wally-intdiv -intdiv`. Files are named with `{precision}_ieee_div_{R}_{K}_{integer}_rv{XLEN}gc_{TESTNAME}.log`
|
||||
|
||||
* precision denotes the floating-point precision types supported by the divider: f, fd, fdq, fdqh
|
||||
* R denotes the radix of the divider: 2,4
|
||||
* K denotes the number of divider copies in the unit: 1,2,4,8
|
||||
* integer denotes whether integer division/remainder is supported on the divider: i
|
||||
* XLEN denotes the width of integers: 32, 64 (this only matters if integer is supported on the divider)
|
||||
* TESTNAME denotes which tests are being run:
|
||||
* fdivremsqrt: runs fdiv, fsqrt, intdiv, intrem
|
||||
* fdiv: runs fdiv
|
||||
* fsqrt: runs fsqrt
|
||||
|
||||
|
||||
|
||||
# synthesis flow
|
||||
To run synthesis results for all flavors of the recurrence unit, go to `cvw/synthDC/scripts` and run `python3 synthdrsu.py`. This will execute a python script that runs the installed version of synopsis design compiler on divider permutations for a target frequency of 5GHz and 100MHz. To then pipe area, delay and energy results to a CSV, run `./writeCSV.sh`. Results can then be viewed in `fp-synthresults_reordered.csv` in a format similar to the one presented in the paper.
|
||||
# start-up steps
|
||||
1) `git clone --recurse-submodules https://github.com/openhwgroup/cvw.git`
|
||||
2) `cd cvw`
|
||||
3) `git checkout divremsqrt`
|
||||
4) `source ./setup.sh`
|
||||
5) `make`
|
||||
6) `/sim/regression-wally -intdiv`
|
||||
|
||||
|
||||
# core-v-wally
|
||||
|
||||
Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, including RV32/64, A, B, C, D, F, M, Q, and Zk* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO). Wally is written in SystemVerilog. It passes the [RISC-V Arch Tests](https://github.com/riscv-non-isa/riscv-arch-test) and boots Linux on an FPGA. Configurations range from a minimal RV32E core to a fully featured RV64GC application processor.
|
||||
|
@ -371,6 +371,7 @@ args = parser.parse_args()
|
||||
if (args.nightly):
|
||||
nightMode = "--nightly";
|
||||
sims = ["questa", "verilator", "vcs"] # exercise all simulators; can omit a sim if no license is available
|
||||
# sims = ["questa", "verilator"] # exercise all simulators; can omit a sim if no license is available
|
||||
else:
|
||||
nightMode = ""
|
||||
sims = [defaultsim]
|
||||
@ -512,10 +513,12 @@ def main():
|
||||
elif args.fcov:
|
||||
TIMEOUT_DUR = 1*60
|
||||
os.system('rm -f questa/fcov_ucdb/* questa/fcov_logs/* questa/fcov/*')
|
||||
elif args.nightly:
|
||||
elif args.buildroot:
|
||||
TIMEOUT_DUR = 60*1440 # 1 day
|
||||
elif args.testfloat:
|
||||
TIMEOUT_DUR = 30*60 # seconds
|
||||
elif args.nightly:
|
||||
TIMEOUT_DUR = 30*60 # seconds
|
||||
else:
|
||||
TIMEOUT_DUR = 10*60 # seconds
|
||||
|
||||
|
577
bin/regression-wally-intdiv
Executable file
577
bin/regression-wally-intdiv
Executable file
@ -0,0 +1,577 @@
|
||||
#!/usr/bin/python3
|
||||
##################################
|
||||
#
|
||||
# regression-wally
|
||||
# David_Harris@Hmc.edu 25 January 2021
|
||||
# Modified by Jarred Allen <jaallen@g.hmc.edu>
|
||||
#
|
||||
# Run a regression with multiple configurations in parallel and exit with
|
||||
# non-zero status code if an error happened, as well as printing human-readable
|
||||
# output.
|
||||
#
|
||||
##################################
|
||||
import sys,os,shutil
|
||||
import multiprocessing
|
||||
|
||||
|
||||
|
||||
class bcolors:
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
OKCYAN = '\033[96m'
|
||||
OKGREEN = '\033[92m'
|
||||
WARNING = '\033[93m'
|
||||
FAIL = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
UNDERLINE = '\033[4m'
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
WALLY = os.environ.get('WALLY')
|
||||
regressionDir = WALLY + '/sim'
|
||||
os.chdir(regressionDir)
|
||||
|
||||
coverage = '-coverage' in sys.argv
|
||||
fp = '-fp' in sys.argv
|
||||
nightly = '-nightly' in sys.argv
|
||||
softfloat = '-softfloat' in sys.argv
|
||||
intdiv = '-intdiv' in sys.argv
|
||||
|
||||
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
|
||||
# name: the name of this test configuration (used in printing human-readable
|
||||
# output and picking logfile names)
|
||||
# cmd: the command to run to test (should include the logfile as '{}', and
|
||||
# the command needs to write to that file)
|
||||
# grepstr: the string to grep through the log file for. The test succeeds iff
|
||||
# grep finds that string in the logfile (is used by grep, so it may
|
||||
# be any pattern grep accepts, see `man 1 grep` for more info).
|
||||
|
||||
# edit this list to add more test cases
|
||||
if (nightly):
|
||||
nightMode = "-nightly";
|
||||
configs = []
|
||||
else:
|
||||
nightMode = "";
|
||||
configs = [
|
||||
TestCase(
|
||||
name="lints",
|
||||
variant="all",
|
||||
cmd="./lint-wally " + nightMode + " | tee {}",
|
||||
grepstr="lints run with no errors or warnings"
|
||||
)
|
||||
]
|
||||
|
||||
def getBuildrootTC(boot):
|
||||
INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
|
||||
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
|
||||
if boot:
|
||||
name="buildrootboot"
|
||||
BRcmd="vsim > {} -c <<!\ndo wally.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
|
||||
BRgrepstr="WallyHostname login:"
|
||||
else:
|
||||
name="buildroot"
|
||||
if (coverage):
|
||||
print( "buildroot coverage")
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0 -coverage\n!"
|
||||
else:
|
||||
print( "buildroot no coverage")
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot configOptions -GINSTR_LIMIT=" +str(INSTR_LIMIT) + " \n!"
|
||||
BRgrepstr=str(INSTR_LIMIT)+" instructions"
|
||||
return TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
|
||||
|
||||
tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused
|
||||
|
||||
tests64i = ["arch64i"]
|
||||
for test in tests64i:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv64i",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64i "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused
|
||||
tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32f_divsqrt", "arch32d_divsqrt",
|
||||
"arch32i", "arch32priv", "arch32c", "arch32m", "arch32a", "arch32zifencei", "arch32zicond",
|
||||
"arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zfh", "arch32zfh_fma",
|
||||
"arch32zfh_divsqrt", "arch32zfaf", "wally32a", "wally32priv", "wally32periph",
|
||||
"arch32zbkb", "arch32zbkc", "arch32zbkx", "arch32zknd", "arch32zkne", "arch32zknh"] # "arch32zbc", "arch32zfad",
|
||||
#tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32i", "arch32priv", "arch32c", "arch32m", "arch32a", "arch32zifencei", "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zicboz", "arch32zcb", "wally32a", "wally32priv", "wally32periph"]
|
||||
for test in tests32gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32imcimperas = ["imperas32i", "imperas32c"] # unused
|
||||
tests32imc = ["arch32i", "arch32c", "arch32m", "wally32periph"]
|
||||
for test in tests32imc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32imc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32imc "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32i = ["arch32i"]
|
||||
for test in tests32i:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32i",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32i "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
|
||||
tests32e = ["arch32e"]
|
||||
for test in tests32e:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32e",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32e "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64f_divsqrt", "arch64d_divsqrt", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", "arch64zfh", "arch64zfh_divsqrt", "arch64zfh_fma", "arch64zfaf", "arch64zfad", "arch64zbkb", "arch64zbkc", "arch64zbkx", "arch64zknd", "arch64zkne", "arch64zknh",
|
||||
"arch64priv", "arch64c", "arch64m", "arch64a", "arch64zifencei", "arch64zicond", "wally64a", "wally64periph", "wally64priv"] # add arch64zfh_fma when available; arch64zicobz, arch64zcb when working
|
||||
#tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",
|
||||
# "arch64priv", "arch64c", "arch64m", "arch64a", "arch64zifencei", "wally64a", "wally64periph", "wally64priv", "arch64zicboz", "arch64zcb"]
|
||||
if (coverage): # delete all but 64gc tests when running coverage
|
||||
configs = []
|
||||
tests64gc = ["coverage64gc", "arch64i", "arch64priv", "arch64c", "arch64m",
|
||||
"arch64zifencei", "arch64zicond", "arch64a", "wally64a", "wally64periph", "wally64priv",
|
||||
"arch64zba", "arch64zbb", "arch64zbc", "arch64zbs"] # add when working: "arch64zcb", "arch64zicboz"
|
||||
if (fp):
|
||||
tests64gc.append("arch64f")
|
||||
tests64gc.append("arch64d")
|
||||
tests64gc.append("arch64zfh")
|
||||
tests64gc.append("arch64f_fma")
|
||||
tests64gc.append("arch64d_fma")
|
||||
tests64gc.append("arch64zfh_fma")
|
||||
tests64gc.append("arch64f_divsqrt")
|
||||
tests64gc.append("arch64d_divsqrt")
|
||||
tests64gc.append("arch64zfh_divsqrt")
|
||||
tests64gc.append("arch64zfaf")
|
||||
tests64gc.append("arch64zfad")
|
||||
coverStr = '-coverage'
|
||||
else:
|
||||
coverStr = ''
|
||||
for test in tests64gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv64gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+" " + coverStr + "\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
# run derivative configurations if requested
|
||||
if (nightly):
|
||||
derivconfigtests = [
|
||||
["tlb2_rv32gc", ["wally32priv"]],
|
||||
["tlb16_rv32gc", ["wally32priv"]],
|
||||
["tlb2_rv64gc", ["wally64priv"]],
|
||||
["tlb16_rv64gc", ["wally64priv"]],
|
||||
["way_1_4096_512_rv32gc", ["arch32i"]],
|
||||
["way_2_4096_512_rv32gc", ["arch32i"]],
|
||||
["way_8_4096_512_rv32gc", ["arch32i"]],
|
||||
["way_4_2048_512_rv32gc", ["arch32i"]],
|
||||
["way_4_4096_256_rv32gc", ["arch32i"]],
|
||||
["way_1_4096_512_rv64gc", ["arch64i"]],
|
||||
["way_2_4096_512_rv64gc", ["arch64i"]],
|
||||
["way_8_4096_512_rv64gc", ["arch64i"]],
|
||||
["way_4_2048_512_rv64gc", ["arch64i"]],
|
||||
["way_4_4096_256_rv64gc", ["arch64i"]],
|
||||
["way_4_4096_1024_rv64gc", ["arch64i"]],
|
||||
|
||||
["ram_0_0_rv64gc", ["ahb64"]],
|
||||
["ram_1_0_rv64gc", ["ahb64"]],
|
||||
["ram_1_1_rv64gc", ["ahb64"]],
|
||||
["ram_2_0_rv64gc", ["ahb64"]],
|
||||
["ram_2_1_rv64gc", ["ahb64"]],
|
||||
|
||||
["noicache_rv32gc", ["ahb32"]],
|
||||
# cacheless designs will not work until DTIM supports FLEN > XLEN
|
||||
# ["nodcache_rv32gc", ["ahb32"]],
|
||||
# ["nocache_rv32gc", ["ahb32"]],
|
||||
["noicache_rv64gc", ["ahb64"]],
|
||||
["nodcache_rv64gc", ["ahb64"]],
|
||||
["nocache_rv64gc", ["ahb64"]],
|
||||
|
||||
### add misaligned tests
|
||||
|
||||
["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
|
||||
### branch predictor simulation
|
||||
|
||||
# ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# # btb
|
||||
# ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# # ras
|
||||
# ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# enable floating-point tests when lint is fixed
|
||||
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
|
||||
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
|
||||
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
|
||||
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
|
||||
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
|
||||
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
|
||||
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed
|
||||
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
|
||||
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
|
||||
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
|
||||
|
||||
|
||||
]
|
||||
for test in derivconfigtests:
|
||||
config = test[0];
|
||||
tests = test[1];
|
||||
if(len(test) >= 4 and test[2] == "configOptions"):
|
||||
configOptions = test[3]
|
||||
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
|
||||
else:
|
||||
configOptions = ""
|
||||
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
|
||||
for t in tests:
|
||||
tc = TestCase(
|
||||
name=t,
|
||||
variant=config,
|
||||
cmd=cmdPrefix+" "+t+" configOptions "+configOptions+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
|
||||
|
||||
|
||||
# softfloat tests
|
||||
if (softfloat):
|
||||
testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
|
||||
configs = []
|
||||
softfloatconfigs = [
|
||||
"fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
|
||||
"fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
|
||||
"fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
|
||||
"fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
|
||||
"fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
|
||||
"fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
|
||||
"fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
|
||||
"fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
|
||||
"fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
|
||||
"fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
|
||||
"fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
|
||||
"fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
|
||||
"fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
|
||||
"fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
|
||||
"fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
|
||||
"fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
|
||||
"fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
|
||||
"fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
|
||||
"fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
|
||||
"fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
|
||||
"f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
|
||||
"f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
|
||||
"f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
|
||||
"f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
|
||||
]
|
||||
for config in softfloatconfigs:
|
||||
# div test case
|
||||
divtest = TestCase(
|
||||
name="div",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " div \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,divtest)
|
||||
|
||||
# sqrt test case
|
||||
sqrttest = TestCase(
|
||||
name="sqrt",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " sqrt \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
#configs.append(sqrttest)
|
||||
configs.insert(0,sqrttest)
|
||||
|
||||
|
||||
# skip if divider variant config
|
||||
if ("ieee" in config):
|
||||
# cvtint test case
|
||||
cvtinttest = TestCase(
|
||||
name="cvtint",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " cvtint \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.append(cvtinttest)
|
||||
|
||||
# cvtfp test case
|
||||
# WILL fail on F_only (refer to spec)
|
||||
cvtfptest = TestCase(
|
||||
name="cvtfp",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " cvtfp \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.append(cvtfptest)
|
||||
|
||||
# intdiv verification
|
||||
if (intdiv):
|
||||
configs = []
|
||||
testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
|
||||
# ***NOTE add to this
|
||||
|
||||
intdivconfigs = [
|
||||
"fdh_ieee_div_2_1i_rv32gc", "fdh_ieee_div_2_1i_rv64gc", "fdh_ieee_div_2_2i_rv32gc",
|
||||
"fdh_ieee_div_2_2i_rv64gc", "fdh_ieee_div_2_4i_rv32gc", "fdh_ieee_div_2_4i_rv64gc",
|
||||
"fdh_ieee_div_4_1i_rv32gc", "fdh_ieee_div_4_1i_rv64gc", "fdh_ieee_div_4_2i_rv32gc",
|
||||
"fdh_ieee_div_4_2i_rv64gc", "fdh_ieee_div_4_4i_rv32gc", "fdh_ieee_div_4_4i_rv64gc",
|
||||
"fd_ieee_div_2_1i_rv32gc", "fd_ieee_div_2_1i_rv64gc", "fd_ieee_div_2_2i_rv32gc",
|
||||
"fd_ieee_div_2_2i_rv64gc", "fd_ieee_div_2_4i_rv32gc", "fd_ieee_div_2_4i_rv64gc",
|
||||
"fd_ieee_div_4_1i_rv32gc", "fd_ieee_div_4_1i_rv64gc", "fd_ieee_div_4_2i_rv32gc",
|
||||
"fd_ieee_div_4_2i_rv64gc", "fd_ieee_div_4_4i_rv32gc", "fd_ieee_div_4_4i_rv64gc",
|
||||
"fdqh_ieee_div_2_1i_rv32gc", "fdqh_ieee_div_2_1i_rv64gc", "fdqh_ieee_div_2_2i_rv32gc",
|
||||
"fdqh_ieee_div_2_2i_rv64gc", "fdqh_ieee_div_2_4i_rv32gc", "fdqh_ieee_div_2_4i_rv64gc",
|
||||
"fdqh_ieee_div_4_1i_rv32gc", "fdqh_ieee_div_4_1i_rv64gc", "fdqh_ieee_div_4_2i_rv32gc",
|
||||
"fdqh_ieee_div_4_2i_rv64gc", "fdqh_ieee_div_4_4i_rv32gc", "fdqh_ieee_div_4_4i_rv64gc",
|
||||
"fdq_ieee_div_2_1i_rv32gc", "fdq_ieee_div_2_1i_rv64gc", "fdq_ieee_div_2_2i_rv32gc",
|
||||
"fdq_ieee_div_2_2i_rv64gc", "fdq_ieee_div_2_4i_rv32gc", "fdq_ieee_div_2_4i_rv64gc",
|
||||
"fdq_ieee_div_4_1i_rv32gc", "fdq_ieee_div_4_1i_rv64gc", "fdq_ieee_div_4_2i_rv32gc",
|
||||
"fdq_ieee_div_4_2i_rv64gc", "fdq_ieee_div_4_4i_rv32gc", "fdq_ieee_div_4_4i_rv64gc",
|
||||
"fh_ieee_div_2_1i_rv32gc", "fh_ieee_div_2_1i_rv64gc", "fh_ieee_div_2_2i_rv32gc",
|
||||
"fh_ieee_div_2_2i_rv64gc", "fh_ieee_div_2_4i_rv32gc", "fh_ieee_div_2_4i_rv64gc",
|
||||
"fh_ieee_div_4_1i_rv32gc", "fh_ieee_div_4_1i_rv64gc", "fh_ieee_div_4_2i_rv32gc",
|
||||
"fh_ieee_div_4_2i_rv64gc", "fh_ieee_div_4_4i_rv32gc", "fh_ieee_div_4_4i_rv64gc",
|
||||
"f_ieee_div_2_1i_rv32gc", "f_ieee_div_2_1i_rv64gc", "f_ieee_div_2_2i_rv32gc",
|
||||
"f_ieee_div_2_2i_rv64gc", "f_ieee_div_2_4i_rv32gc", "f_ieee_div_2_4i_rv64gc",
|
||||
"f_ieee_div_4_1i_rv32gc", "f_ieee_div_4_1i_rv64gc", "f_ieee_div_4_2i_rv32gc",
|
||||
"f_ieee_div_4_2i_rv64gc", "f_ieee_div_4_4i_rv32gc", "f_ieee_div_4_4i_rv64gc",
|
||||
"fd_ieee_div_2_8i_rv32gc",
|
||||
"fd_ieee_div_2_8i_rv64gc",
|
||||
"fdq_ieee_div_2_8i_rv64gc",
|
||||
"fdq_ieee_div_2_8i_rv32gc",
|
||||
"f_ieee_div_2_8i_rv64gc",
|
||||
"f_ieee_div_2_8i_rv32gc"
|
||||
]
|
||||
nointdivconfigs = [
|
||||
"fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
|
||||
"fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
|
||||
"fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
|
||||
"fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
|
||||
"fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
|
||||
"fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
|
||||
"fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
|
||||
"fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
|
||||
"fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
|
||||
"fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
|
||||
"fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
|
||||
"fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
|
||||
"fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
|
||||
"fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
|
||||
"fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
|
||||
"fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
|
||||
"fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
|
||||
"fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
|
||||
"fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
|
||||
"fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
|
||||
"f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
|
||||
"f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
|
||||
"f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
|
||||
"f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
|
||||
]
|
||||
for config in intdivconfigs:
|
||||
# fdivremsqrt test case
|
||||
name = "div_drsu"
|
||||
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log"
|
||||
fdivremsqrttestcase = TestCase(
|
||||
name=name,
|
||||
variant=config,
|
||||
cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,fdivremsqrttestcase)
|
||||
for config in nointdivconfigs:
|
||||
# div,sqrt test cases for no integer flavor of divider
|
||||
|
||||
name = "div_drsu"
|
||||
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log"
|
||||
divtestcase = TestCase(
|
||||
name=name,
|
||||
variant=config,
|
||||
#cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " div_drsu \n!",
|
||||
cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,divtestcase)
|
||||
|
||||
name = "sqrt_drsu"
|
||||
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log"
|
||||
sqrttestcase = TestCase(
|
||||
name=name,
|
||||
variant=config,
|
||||
#cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " sqrt_drsu \n!",
|
||||
cmd="wsim --tb testbench_fp " + " " + config + " " + name + " > " + logname,
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,sqrttestcase)
|
||||
|
||||
import os
|
||||
from multiprocessing import Pool, TimeoutError
|
||||
|
||||
def search_log_for_text(text, logfile):
|
||||
"""Search through the given log file for text, returning True if it is found or False if it is not"""
|
||||
grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile)
|
||||
return os.system(grepcmd) == 0
|
||||
|
||||
def run_test_case(config):
|
||||
testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved
|
||||
"""Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
|
||||
#sim_logdir = WALLY+ "/sim/" + sim + "/logs/"
|
||||
logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config.variant+"_"+config.name+".log"
|
||||
#logname = "logs/"+config.variant+"_"+config.name+".log"
|
||||
cmd = config.cmd.format(logname)
|
||||
# print(cmd)
|
||||
os.chdir(regressionDir)
|
||||
os.system(cmd)
|
||||
if search_log_for_text(config.grepstr, logname):
|
||||
print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name))
|
||||
return 0
|
||||
else:
|
||||
print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name))
|
||||
print(" Check %s" % logname)
|
||||
return 1
|
||||
|
||||
def main():
|
||||
"""Run the tests and count the failures"""
|
||||
global configs, coverage
|
||||
try:
|
||||
os.chdir(regressionDir)
|
||||
os.mkdir("logs")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree("wkdir")
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
os.mkdir("wkdir")
|
||||
|
||||
if '-makeTests' in sys.argv:
|
||||
os.chdir(regressionDir)
|
||||
os.system('./make-tests.sh | tee ./logs/make-tests.log')
|
||||
|
||||
if '-all' in sys.argv:
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs.append(getBuildrootTC(boot=True))
|
||||
elif '-buildroot' in sys.argv:
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs=[getBuildrootTC(boot=True)]
|
||||
elif '-coverage' in sys.argv:
|
||||
TIMEOUT_DUR = 20*60 # seconds
|
||||
# Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage.
|
||||
# Also it is slow to run.
|
||||
# configs.append(getBuildrootTC(boot=False))
|
||||
os.system('rm -f cov/*.ucdb')
|
||||
elif '-nightly' in sys.argv:
|
||||
TIMEOUT_DUR = 60*1440 # 1 day
|
||||
configs.append(getBuildrootTC(boot=False))
|
||||
elif '-softfloat' in sys.argv:
|
||||
TIMEOUT_DUR = 60*60 # seconds
|
||||
elif '-intdiv' in sys.argv:
|
||||
TIMEOUT_DUR = 60*60 # seconds
|
||||
else:
|
||||
TIMEOUT_DUR = 10*60 # seconds
|
||||
configs.append(getBuildrootTC(boot=False))
|
||||
|
||||
# Scale the number of concurrent processes to the number of test cases, but
|
||||
# max out at a limited number of concurrent processes to not overwhelm the system
|
||||
with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool:
|
||||
num_fail = 0
|
||||
results = {}
|
||||
for config in configs:
|
||||
results[config] = pool.apply_async(run_test_case,(config,))
|
||||
for (config,result) in results.items():
|
||||
try:
|
||||
num_fail+=result.get(timeout=TIMEOUT_DUR)
|
||||
except TimeoutError:
|
||||
num_fail+=1
|
||||
print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
|
||||
|
||||
# Coverage report
|
||||
if coverage:
|
||||
os.system('make coverage')
|
||||
# Count the number of failures
|
||||
if num_fail:
|
||||
print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
|
||||
else:
|
||||
print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}")
|
||||
return num_fail
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
13
bin/wsim
13
bin/wsim
@ -28,6 +28,7 @@ parser.add_argument("--tb", "-t", help="Testbench", choices=["testbench", "testb
|
||||
parser.add_argument("--gui", "-g", help="Simulate with GUI", action="store_true")
|
||||
parser.add_argument("--ccov", "-c", help="Code Coverage", action="store_true")
|
||||
parser.add_argument("--fcov", "-f", help="Functional Coverage, implies lockstep", action="store_true")
|
||||
parser.add_argument("--fcov2", "-f2", help="Functional Coverage, implies lockstep", action="store_true")
|
||||
parser.add_argument("--fcovrvvi", "-fr", help="Functional Coverage RVVI", action="store_true")
|
||||
parser.add_argument("--args", "-a", help="Optional arguments passed to simulator via $value$plusargs", default="")
|
||||
parser.add_argument("--vcd", "-v", help="Generate testbench.vcd", action="store_true")
|
||||
@ -66,7 +67,7 @@ if(args.testsuite.endswith('.elf') and args.elf == ""): # No --elf argument; che
|
||||
|
||||
|
||||
# Validate arguments
|
||||
if (args.gui or args.ccov or args.fcov or args.fcovrvvi or args.lockstep):
|
||||
if (args.gui or args.ccov or args.fcov or args.fcov2 or args.fcovrvvi or args.lockstep):
|
||||
if args.sim not in ["questa", "vcs"]:
|
||||
print("Option only supported for Questa and VCS")
|
||||
exit(1)
|
||||
@ -81,7 +82,7 @@ if (args.rvvi):
|
||||
if(int(args.locksteplog) >= 1): EnableLog = 1
|
||||
else: EnableLog = 0
|
||||
if (args.lockstep):
|
||||
prefix = "IMPERAS_TOOLS=" + WALLY + "/sim/imperas.ic"
|
||||
prefix = "IMPERAS_TOOLS=" + WALLY + "/config/"+args.config+"/imperas.ic"
|
||||
if(args.locksteplog != 0): ImperasPlusArgs = " +IDV_TRACE2LOG=" + str(EnableLog) + " +IDV_TRACE2LOG_AFTER=" + str(args.locksteplog)
|
||||
else: ImperasPlusArgs = ""
|
||||
if(args.fcov):
|
||||
@ -90,6 +91,12 @@ if (args.lockstep):
|
||||
else: EnableLog = 0
|
||||
ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr;
|
||||
suffix = ""
|
||||
if(args.fcov2):
|
||||
CovEnableStr = "1" if int(args.covlog) > 0 else "0";
|
||||
if(args.covlog >= 1): EnableLog = 1
|
||||
else: EnableLog = 0
|
||||
ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr;
|
||||
suffix = ""
|
||||
else:
|
||||
CovEnableStr = ""
|
||||
suffix = "--lockstep"
|
||||
@ -104,6 +111,8 @@ if (args.ccov):
|
||||
flags += " --ccov"
|
||||
if (args.fcov):
|
||||
flags += " --fcov"
|
||||
if (args.fcov2):
|
||||
flags += " --fcov2"
|
||||
if (args.fcovrvvi):
|
||||
flags += "--fcovrvvi"
|
||||
|
||||
|
@ -950,6 +950,9 @@ D_SUPPORTED 0
|
||||
ZCD_SUPPORTED 0
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
deriv f_div_2_8_rv64gc f_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv f_div_4_1_rv64gc div_4_1_rv64gc
|
||||
D_SUPPORTED 0
|
||||
ZCD_SUPPORTED 0
|
||||
@ -982,6 +985,9 @@ D_SUPPORTED 0
|
||||
ZCD_SUPPORTED 0
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
deriv fh_div_2_8_rv32gc fh_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fh_div_4_1_rv32gc div_4_1_rv32gc
|
||||
D_SUPPORTED 0
|
||||
ZCD_SUPPORTED 0
|
||||
@ -1012,6 +1018,9 @@ D_SUPPORTED 0
|
||||
ZCD_SUPPORTED 0
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
deriv fh_div_2_8_rv64gc fh_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fh_div_4_1_rv64gc div_4_1_rv64gc
|
||||
D_SUPPORTED 0
|
||||
ZCD_SUPPORTED 0
|
||||
@ -1038,6 +1047,9 @@ ZFH_SUPPORTED 0
|
||||
deriv fd_div_2_4_rv32gc div_2_4_rv32gc
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
deriv fd_div_2_8_rv32gc fd_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fd_div_4_1_rv32gc div_4_1_rv32gc
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
@ -1056,6 +1068,9 @@ ZFH_SUPPORTED 0
|
||||
deriv fd_div_2_4_rv64gc div_2_4_rv64gc
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
deriv fd_div_2_8_rv64gc fd_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fd_div_4_1_rv64gc div_4_1_rv64gc
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
@ -1077,6 +1092,9 @@ ZFH_SUPPORTED 1
|
||||
deriv fdh_div_2_4_rv32gc div_2_4_rv32gc
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
deriv fdh_div_2_8_rv32gc fdh_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdh_div_4_1_rv32gc div_4_1_rv32gc
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
@ -1095,6 +1113,9 @@ ZFH_SUPPORTED 1
|
||||
deriv fdh_div_2_4_rv64gc div_2_4_rv64gc
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
deriv fdh_div_2_8_rv64gc fdh_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdh_div_4_1_rv64gc div_4_1_rv64gc
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
@ -1118,6 +1139,9 @@ deriv fdq_div_2_4_rv32gc div_2_4_rv32gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
deriv fdq_div_2_8_rv32gc fdq_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdq_div_4_1_rv32gc div_4_1_rv32gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 0
|
||||
@ -1142,6 +1166,9 @@ deriv fdq_div_2_4_rv64gc div_2_4_rv64gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 0
|
||||
|
||||
deriv fdq_div_2_8_rv64gc fdq_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdq_div_4_1_rv64gc div_4_1_rv64gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 0
|
||||
@ -1168,6 +1195,9 @@ deriv fdqh_div_2_4_rv32gc div_2_4_rv32gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
deriv fdqh_div_2_8_rv32gc fdqh_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdqh_div_4_1_rv32gc div_4_1_rv32gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 1
|
||||
@ -1192,6 +1222,9 @@ deriv fdqh_div_2_4_rv64gc div_2_4_rv64gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 1
|
||||
|
||||
deriv fdqh_div_2_8_rv64gc fdqh_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdqh_div_4_1_rv64gc div_4_1_rv64gc
|
||||
Q_SUPPORTED 1
|
||||
ZFH_SUPPORTED 1
|
||||
@ -1215,6 +1248,9 @@ IEEE754 1
|
||||
deriv f_ieee_div_2_4_rv32gc f_div_2_4_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
deriv f_ieee_div_2_8_rv32gc f_ieee_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv f_ieee_div_4_1_rv32gc f_div_4_1_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1233,6 +1269,9 @@ IEEE754 1
|
||||
deriv f_ieee_div_2_4_rv64gc f_div_2_4_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
deriv f_ieee_div_2_8_rv64gc f_ieee_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv f_ieee_div_4_1_rv64gc f_div_4_1_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1252,6 +1291,9 @@ IEEE754 1
|
||||
deriv fh_ieee_div_2_4_rv32gc fh_div_2_4_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fh_ieee_div_2_8_rv32gc fh_ieee_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fh_ieee_div_4_1_rv32gc fh_div_4_1_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1270,6 +1312,9 @@ IEEE754 1
|
||||
deriv fh_ieee_div_2_4_rv64gc fh_div_2_4_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fh_ieee_div_2_8_rv64gc fh_ieee_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fh_ieee_div_4_1_rv64gc fh_div_4_1_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1289,6 +1334,9 @@ IEEE754 1
|
||||
deriv fd_ieee_div_2_4_rv32gc fd_div_2_4_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fd_ieee_div_2_8_rv32gc fd_ieee_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fd_ieee_div_4_1_rv32gc fd_div_4_1_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1307,6 +1355,9 @@ IEEE754 1
|
||||
deriv fd_ieee_div_2_4_rv64gc fd_div_2_4_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fd_ieee_div_2_8_rv64gc fd_ieee_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fd_ieee_div_4_1_rv64gc fd_div_4_1_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1327,6 +1378,9 @@ IEEE754 1
|
||||
deriv fdh_ieee_div_2_4_rv32gc fdh_div_2_4_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fdh_ieee_div_2_8_rv32gc fdh_ieee_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdh_ieee_div_4_1_rv32gc fdh_div_4_1_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1345,6 +1399,9 @@ IEEE754 1
|
||||
deriv fdh_ieee_div_2_4_rv64gc fdh_div_2_4_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fdh_ieee_div_2_8_rv64gc fdh_ieee_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdh_ieee_div_4_1_rv64gc fdh_div_4_1_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1364,6 +1421,9 @@ IEEE754 1
|
||||
deriv fdq_ieee_div_2_4_rv32gc fdq_div_2_4_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fdq_ieee_div_2_8_rv32gc fdq_ieee_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdq_ieee_div_4_1_rv32gc fdq_div_4_1_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1382,6 +1442,9 @@ IEEE754 1
|
||||
deriv fdq_ieee_div_2_4_rv64gc fdq_div_2_4_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fdq_ieee_div_2_8_rv64gc fdq_ieee_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdq_ieee_div_4_1_rv64gc fdq_div_4_1_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1402,6 +1465,9 @@ IEEE754 1
|
||||
deriv fdqh_ieee_div_2_4_rv32gc fdqh_div_2_4_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fdqh_ieee_div_2_8_rv32gc fdqh_ieee_div_2_4_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdqh_ieee_div_4_1_rv32gc fdqh_div_4_1_rv32gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1420,6 +1486,9 @@ IEEE754 1
|
||||
deriv fdqh_ieee_div_2_4_rv64gc fdqh_div_2_4_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
deriv fdqh_ieee_div_2_8_rv64gc fdqh_ieee_div_2_4_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdqh_ieee_div_4_1_rv64gc fdqh_div_4_1_rv64gc
|
||||
IEEE754 1
|
||||
|
||||
@ -1440,6 +1509,9 @@ IDIV_ON_FPU 1
|
||||
deriv f_ieee_div_2_4i_rv32gc f_ieee_div_2_4_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv f_ieee_div_2_8i_rv32gc f_ieee_div_2_4i_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv f_ieee_div_4_1i_rv32gc f_ieee_div_4_1_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1458,6 +1530,9 @@ IDIV_ON_FPU 1
|
||||
deriv f_ieee_div_2_4i_rv64gc f_ieee_div_2_4_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv f_ieee_div_2_8i_rv64gc f_ieee_div_2_4i_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv f_ieee_div_4_1i_rv64gc f_ieee_div_4_1_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1477,6 +1552,9 @@ IDIV_ON_FPU 1
|
||||
deriv fh_ieee_div_2_4i_rv32gc fh_ieee_div_2_4_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fh_ieee_div_2_8i_rv32gc fh_ieee_div_2_4i_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fh_ieee_div_4_1i_rv32gc fh_ieee_div_4_1_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1495,6 +1573,9 @@ IDIV_ON_FPU 1
|
||||
deriv fh_ieee_div_2_4i_rv64gc fh_ieee_div_2_4_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fh_ieee_div_2_8i_rv64gc fh_ieee_div_2_4i_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fh_ieee_div_4_1i_rv64gc fh_ieee_div_4_1_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1515,6 +1596,9 @@ IDIV_ON_FPU 1
|
||||
deriv fd_ieee_div_2_4i_rv32gc fd_ieee_div_2_4_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fd_ieee_div_2_8i_rv32gc fd_ieee_div_2_4i_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fd_ieee_div_4_1i_rv32gc fd_ieee_div_4_1_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1533,6 +1617,9 @@ IDIV_ON_FPU 1
|
||||
deriv fd_ieee_div_2_4i_rv64gc fd_ieee_div_2_4_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fd_ieee_div_2_8i_rv64gc fd_ieee_div_2_4i_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fd_ieee_div_4_1i_rv64gc fd_ieee_div_4_1_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1553,6 +1640,9 @@ IDIV_ON_FPU 1
|
||||
deriv fdh_ieee_div_2_4i_rv32gc fdh_ieee_div_2_4_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fdh_ieee_div_2_8i_rv32gc fdh_ieee_div_2_4i_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdh_ieee_div_4_1i_rv32gc fdh_ieee_div_4_1_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1571,6 +1661,9 @@ IDIV_ON_FPU 1
|
||||
deriv fdh_ieee_div_2_4i_rv64gc fdh_ieee_div_2_4_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fdh_ieee_div_2_8i_rv64gc fdh_ieee_div_2_4i_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdh_ieee_div_4_1i_rv64gc fdh_ieee_div_4_1_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1591,6 +1684,9 @@ IDIV_ON_FPU 1
|
||||
deriv fdq_ieee_div_2_4i_rv32gc fdq_ieee_div_2_4_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fdq_ieee_div_2_8i_rv32gc fdq_ieee_div_2_4i_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdq_ieee_div_4_1i_rv32gc fdq_ieee_div_4_1_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1609,6 +1705,9 @@ IDIV_ON_FPU 1
|
||||
deriv fdq_ieee_div_2_4i_rv64gc fdq_ieee_div_2_4_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fdq_ieee_div_2_8i_rv64gc fdq_ieee_div_2_4i_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdq_ieee_div_4_1i_rv64gc fdq_ieee_div_4_1_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1629,6 +1728,9 @@ IDIV_ON_FPU 1
|
||||
deriv fdqh_ieee_div_2_4i_rv32gc fdqh_ieee_div_2_4_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fdqh_ieee_div_2_8i_rv32gc fdqh_ieee_div_2_4i_rv32gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdqh_ieee_div_4_1i_rv32gc fdqh_ieee_div_4_1_rv32gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
@ -1647,6 +1749,9 @@ IDIV_ON_FPU 1
|
||||
deriv fdqh_ieee_div_2_4i_rv64gc fdqh_ieee_div_2_4_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
deriv fdqh_ieee_div_2_8i_rv64gc fdqh_ieee_div_2_4i_rv64gc
|
||||
DIVCOPIES 32'd8
|
||||
|
||||
deriv fdqh_ieee_div_4_1i_rv64gc fdqh_ieee_div_4_1_rv64gc
|
||||
IDIV_ON_FPU 1
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#--showcommands
|
||||
|
||||
# Core settings
|
||||
--variant RV32GC # for RV32GC
|
||||
--override cpu/priv_version=1.12
|
||||
--override cpu/user_version=20191213
|
||||
# arch
|
||||
@ -38,11 +39,12 @@
|
||||
--override lr_sc_grain=8 # Za64rs requires <=64; we use native word size
|
||||
|
||||
# 64 KiB continuous huge pages supported
|
||||
--override cpu/Svpbmt=T
|
||||
--override cpu/Svnapot_page_mask=65536
|
||||
#--override cpu/Svpbmt=F
|
||||
#--override cpu/Svnapot_page_mask=65536
|
||||
|
||||
# SV39 and SV48 supported
|
||||
--override cpu/Sv_modes=768
|
||||
# SV32 supported
|
||||
--override cpu/Sv_modes=3
|
||||
#--showoverrides
|
||||
|
||||
--override cpu/Svinval=T
|
||||
|
||||
@ -59,7 +61,7 @@
|
||||
|
||||
--override cpu/reset_address=0x80000000
|
||||
|
||||
--override cpu/unaligned=T # Zicclsm (should be true)
|
||||
--override cpu/unaligned=F # Zicclsm (should be true)
|
||||
--override cpu/ignore_non_leaf_DAU=1
|
||||
--override cpu/wfi_is_nop=T
|
||||
--override cpu/misa_Extensions_mask=0x0 # MISA not writable
|
||||
@ -74,7 +76,7 @@
|
||||
--override cpu/PMP_undefined=T
|
||||
|
||||
# mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception
|
||||
--override cpu/mstatus_fs_mode=rvfs_write_nz
|
||||
--override cpu/mstatus_fs_mode=write_1
|
||||
|
||||
# PMA Settings
|
||||
# 'r': read access allowed
|
117
config/rv64gc/imperas.ic
Normal file
117
config/rv64gc/imperas.ic
Normal file
@ -0,0 +1,117 @@
|
||||
# imperas.ic
|
||||
# Initialization file for ImperasDV lock step simulation
|
||||
# David_Harris@hmc.edu 15 August 2024
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
|
||||
#--mpdconsole
|
||||
#--gdbconsole
|
||||
#--showoverrides
|
||||
#--showcommands
|
||||
|
||||
# Core settings
|
||||
--override cpu/priv_version=1.12
|
||||
--override cpu/user_version=20191213
|
||||
# arch
|
||||
--override cpu/mimpid=0x100
|
||||
--override cpu/mvendorid=0x602
|
||||
--override cpu/marchid=0x24
|
||||
--override refRoot/cpu/tvec_align=64
|
||||
--override refRoot/cpu/envcfg_mask=1 # dh 1/26/24 this should be deleted when ImperasDV is updated to allow envcfg.FIOM to be written
|
||||
|
||||
# bit manipulation
|
||||
--override cpu/add_Extensions=B
|
||||
--override cpu/bitmanip_version=1.0.0
|
||||
--override cpu/misa_B_Zba_Zbb_Zbs=T
|
||||
|
||||
# More extensions
|
||||
--override cpu/Zcb=T
|
||||
--override cpu/Zicond=T
|
||||
--override cpu/Zfh=T
|
||||
--override cpu/Zfa=T
|
||||
|
||||
# Cache block operations
|
||||
--override cpu/Zicbom=T
|
||||
--override cpu/Zicbop=T
|
||||
--override cpu/Zicboz=T
|
||||
--override cmomp_bytes=64 # Zic64b
|
||||
--override cmoz_bytes=64 # Zic64b
|
||||
--override lr_sc_grain=8 # Za64rs requires <=64; we use native word size
|
||||
|
||||
# 64 KiB continuous huge pages supported
|
||||
--override cpu/Svpbmt=T
|
||||
--override cpu/Svnapot_page_mask=65536
|
||||
|
||||
# SV39 and SV48 supported
|
||||
--override cpu/Sv_modes=768
|
||||
|
||||
--override cpu/Svinval=T
|
||||
|
||||
|
||||
# clarify
|
||||
#--override refRoot/cpu/mtvec_sext=F
|
||||
|
||||
--override cpu/tval_ii_code=T
|
||||
|
||||
#--override cpu/time_undefined=T
|
||||
#--override cpu/cycle_undefined=T
|
||||
#--override cpu/instret_undefined=T
|
||||
#--override cpu/hpmcounter_undefined=T
|
||||
|
||||
--override cpu/reset_address=0x80000000
|
||||
|
||||
--override cpu/unaligned=T # Zicclsm (should be true)
|
||||
--override cpu/ignore_non_leaf_DAU=1
|
||||
--override cpu/wfi_is_nop=T
|
||||
--override cpu/misa_Extensions_mask=0x0 # MISA not writable
|
||||
--override cpu/Sstc=T
|
||||
|
||||
# Enable SVADU hardware update of A/D bits when menvcfg.ADUE=1
|
||||
--override cpu/Svadu=T
|
||||
#--override cpu/updatePTEA=F
|
||||
#--override cpu/updatePTED=F
|
||||
|
||||
--override cpu/PMP_registers=16
|
||||
--override cpu/PMP_undefined=T
|
||||
|
||||
# mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception
|
||||
--override cpu/mstatus_fs_mode=write_1
|
||||
|
||||
# PMA Settings
|
||||
# 'r': read access allowed
|
||||
# 'w': write access allowed
|
||||
# 'x': execute access allowed
|
||||
# 'a': aligned access required
|
||||
# 'A': atomic instructions NOT allowed (actually USER1 privilege needed)
|
||||
# 'P': push/pop instructions NOT allowed (actually USER2 privilege needed)
|
||||
# '1': 1-byte accesses allowed
|
||||
# '2': 2-byte accesses allowed
|
||||
# '4': 4-byte accesses allowed
|
||||
# '8': 8-byte accesses allowed
|
||||
# '-', space: ignored (use for input string formatting).
|
||||
#
|
||||
# SVxx Memory 0x0000000000 0x7FFFFFFFFF
|
||||
#
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0xFFFFFFFFFFFFFFFFFF -attributes " ---a-- ---- " # All memory inaccessible unless defined otherwise
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0x7FFFFFFFFF -attributes " ---a-- ---- " # INITIAL
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000001000 -hi 0x0000001FFF -attributes " r-x-A- 1248 " # BOOTROM
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0000012100 -hi 0x000001211F -attributes " rw-aA- --48 " # SDC
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0002000000 -hi 0x000200FFFF -attributes " rw-aA- 1248 " # CLINT
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x000C000000 -hi 0x000FFFFFFF -attributes " rw-aA- --4- " # PLIC
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0010000000 -hi 0x0010000007 -attributes " rw-aA- 1--- " # UART0 error - 0x10000000 - 0x100000FF
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0010060000 -hi 0x00100600FF -attributes " rw-aA- --4- " # GPIO error - 0x10069000 - 0x100600FF
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0010040000 -hi 0x0010040FFF -attributes " rw-aA- --4- " # SPI error - 0x10040000 - 0x10040FFF
|
||||
--callcommand refRoot/cpu/setPMA -lo 0x0080000000 -hi 0x008FFFFFFF -attributes " rwx--- 1248 " # UNCORE_RAM
|
||||
|
||||
# Enable the Imperas instruction coverage
|
||||
#-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0
|
||||
#-override refRoot/cpu/cv/cover=basic
|
||||
#-override refRoot/cpu/cv/extensions=RV32I
|
||||
|
||||
# Add Imperas simulator application instruction tracing
|
||||
# uncomment these to provide tracing
|
||||
#--verbose --trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange # --traceafter 300000000
|
||||
#--override cpu/debugflags=6 --override cpu/verbose=1
|
||||
#--override cpu/show_c_prefix=T
|
||||
|
||||
# Store simulator output to logfile
|
||||
--output imperas.log
|
@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN
|
||||
|
||||
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
|
||||
|
||||
localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF));
|
||||
localparam NORMSHIFTSZDRSU = DIVb+1+NF;
|
||||
localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU);
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
/* verilator lint_off STMTDLY */
|
||||
|
@ -194,6 +194,8 @@ localparam cvw_t P = '{
|
||||
FMALEN : FMALEN,
|
||||
NORMSHIFTSZ : NORMSHIFTSZ,
|
||||
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
|
||||
NORMSHIFTSZDRSU : NORMSHIFTSZDRSU,
|
||||
LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU,
|
||||
LOGR : LOGR,
|
||||
RK : RK,
|
||||
FPDUR : FPDUR,
|
||||
|
@ -52,6 +52,42 @@ when 8 bytes are transferred
|
||||
|
||||
*/
|
||||
|
||||
// crc16 table to reduce byte processing time
|
||||
static const uint16_t crctable[256] = {
|
||||
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
|
||||
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
|
||||
0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
|
||||
0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
|
||||
0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
|
||||
0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
|
||||
0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
|
||||
0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
|
||||
0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
|
||||
0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
|
||||
0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
|
||||
0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
|
||||
0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
|
||||
0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
|
||||
0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
|
||||
0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
|
||||
0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
|
||||
0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
|
||||
0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
|
||||
0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
|
||||
0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
|
||||
0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
|
||||
0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
|
||||
0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
|
||||
0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
|
||||
0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
|
||||
0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
|
||||
0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
|
||||
0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
|
||||
0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
|
||||
0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
|
||||
0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0
|
||||
};
|
||||
|
||||
int disk_read(BYTE * buf, LBA_t sector, UINT count) {
|
||||
uint64_t r;
|
||||
UINT i, j;
|
||||
@ -86,6 +122,7 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) {
|
||||
for (i = 0; i < count; i++) {
|
||||
uint16_t crc, crc_exp;
|
||||
uint64_t n = 0;
|
||||
uint64_t readCount = 0;
|
||||
|
||||
// Wait for data token
|
||||
while((r = spi_dummy()) != SD_DATA_TOKEN);
|
||||
@ -98,21 +135,45 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) {
|
||||
/* crc = crc16(crc, x); */
|
||||
/* } while (--n > 0); */
|
||||
|
||||
n = 512/8;
|
||||
do {
|
||||
// Send 8 dummy bytes (fifo should be empty)
|
||||
for (j = 0; j < 8; j++) {
|
||||
/* n = 512/8; */
|
||||
/* do { */
|
||||
/* // Send 8 dummy bytes (fifo should be empty) */
|
||||
/* for (j = 0; j < 8; j++) { */
|
||||
/* spi_sendbyte(0xff); */
|
||||
/* } */
|
||||
|
||||
/* // Reset counter. Process bytes AS THEY COME IN. */
|
||||
/* for (j = 0; j < 8; j++) { */
|
||||
/* while (!(read_reg(SPI_IP) & 2)) {} */
|
||||
/* uint8_t x = spi_readbyte(); */
|
||||
/* *p++ = x; */
|
||||
/* // crc = crc16(crc, x); */
|
||||
/* crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff; */
|
||||
/* } */
|
||||
/* } while(--n > 0); */
|
||||
|
||||
n = 512;
|
||||
// Initially fill the transmit fifo
|
||||
for (j = 0; j < 8; j++) {
|
||||
spi_sendbyte(0xff);
|
||||
}
|
||||
|
||||
|
||||
while (n > 0) {
|
||||
// Wait for bytes to be received
|
||||
while (!(read_reg(SPI_IP) & 2)) {}
|
||||
// Read byte
|
||||
uint8_t x = spi_readbyte();
|
||||
// Send another dummy byte
|
||||
if (n > 8) {
|
||||
spi_sendbyte(0xff);
|
||||
}
|
||||
|
||||
// Reset counter. Process bytes AS THEY COME IN.
|
||||
for (j = 0; j < 8; j++) {
|
||||
while (!(read_reg(SPI_IP) & 2)) {}
|
||||
uint8_t x = spi_readbyte();
|
||||
*p++ = x;
|
||||
crc = crc16(crc, x);
|
||||
}
|
||||
} while(--n > 0);
|
||||
// Place received byte into memory
|
||||
*p++ = x;
|
||||
// Update CRC16 with fast table based method
|
||||
crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff;
|
||||
n = n - 1;
|
||||
}
|
||||
|
||||
// Read CRC16 and check
|
||||
crc_exp = ((uint16_t)spi_dummy() << 8);
|
||||
|
@ -1,3 +1,32 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// spi.h
|
||||
//
|
||||
// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024
|
||||
//
|
||||
// Purpose: Header file for interfaceing with the SPI peripheral
|
||||
//
|
||||
//
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the
|
||||
// “License”); you may not use this file except in compliance with the
|
||||
// License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work
|
||||
// distributed under the License is distributed on an “AS IS” BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
#ifndef SPI_HEADER
|
||||
#define SPI_HEADER
|
||||
|
@ -149,10 +149,32 @@ if {$FunctCoverageIndex >= 0} {
|
||||
set lst [lreplace $lst $FunctCoverageIndex $FunctCoverageIndex]
|
||||
}\
|
||||
|
||||
set FunctCoverageIndex2 [lsearch -exact $lst "--fcov2"]
|
||||
if {$FunctCoverageIndex2 >= 0} {
|
||||
set FunctCoverage 1
|
||||
set riscvISACOVsrc +incdir+$env(IMPERAS_HOME)/ImpProprietary/source/host/riscvISACOV/source
|
||||
|
||||
set FCdefineINCLUDE_TRACE2COV "+define+INCLUDE_TRACE2COV"
|
||||
set FCdefineCOVER_BASE_RV64I "+define+COVER_BASE_RV64I"
|
||||
set FCdefineCOVER_LEVEL_DV_PR_EXT "+define+COVER_LEVEL_DV_PR_EXT"
|
||||
# Uncomment various cover statements below to control which extensions get functional coverage
|
||||
set FCdefineCOVER_RV64I "+define+COVER_RV64I"
|
||||
#set FCdefineCOVER_RV64M "+define+COVER_RV64M"
|
||||
#set FCdefineCOVER_RV64A "+define+COVER_RV64A"
|
||||
#set FCdefineCOVER_RV64F "+define+COVER_RV64F"
|
||||
#set FCdefineCOVER_RV64D "+define+COVER_RV64D"
|
||||
#set FCdefineCOVER_RV64ZICSR "+define+COVER_RV64ZICSR"
|
||||
#set FCdefineCOVER_RV64C "+define+COVER_RV64C"
|
||||
set FCdefineIDV_INCLUDE_TRACE2COV "+define+IDV_INCLUDE_TRACE2COV"
|
||||
set FCTRACE2COV "+TRACE2COV_ENABLE=1"
|
||||
set FCdefineIDV_TRACE2COV "+IDV_TRACE2COV=1"
|
||||
set lst [lreplace $lst $FunctCoverageIndex2 $FunctCoverageIndex2]
|
||||
}\
|
||||
|
||||
set LockStepIndex [lsearch -exact $lst "--lockstep"]
|
||||
# ugh. can't have more than 9 arguments passed to vsim. why? I'll have to remove --lockstep when running
|
||||
# functional coverage and imply it.
|
||||
if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0} {
|
||||
if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0 || $FunctCoverageIndex2 >= 0} {
|
||||
set lockstep 1
|
||||
|
||||
# ideally this would all be one or two variables, but questa is having a real hard time
|
||||
|
@ -11,6 +11,7 @@
|
||||
# Must edit these based on your local environment.
|
||||
export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu # Change this to your Siemens license server for Questa
|
||||
export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server
|
||||
export IMPERASD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Imperas license server
|
||||
export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim # Change this for your path to Questa, excluding bin
|
||||
export DC_HOME=/cad/synopsys/SYN # Change this for your path to Synopsys Design Compiler, excluding bin
|
||||
export VCS_HOME=/cad/synopsys/vcs/U-2023.03-SP2-4 # Change this for your path to Synopsys VCS, excluding bin
|
||||
|
@ -285,6 +285,8 @@ typedef struct packed {
|
||||
int LOGCVTLEN;
|
||||
int NORMSHIFTSZ;
|
||||
int LOGNORMSHIFTSZ;
|
||||
int NORMSHIFTSZDRSU;
|
||||
int LOGNORMSHIFTSZDRSU;
|
||||
int FMALEN;
|
||||
|
||||
// division constants
|
||||
|
9
src/fpu/divremsqrt/arithrightshift.sv
Normal file
9
src/fpu/divremsqrt/arithrightshift.sv
Normal file
@ -0,0 +1,9 @@
|
||||
|
||||
module arithrightshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic signed [P.INTDIVb+3:0] shiftin,
|
||||
output logic signed [P.INTDIVb+3:0] shifted
|
||||
);
|
||||
assign shifted = $signed(shiftin) >>> P.LOGR;
|
||||
|
||||
endmodule
|
||||
|
110
src/fpu/divremsqrt/divremsqrt.sv
Normal file
110
src/fpu/divremsqrt/divremsqrt.sv
Normal file
@ -0,0 +1,110 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrt.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified:19 May 2023
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.NE+1:0] UeM,
|
||||
output logic [P.DIVb:0] UmM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM,
|
||||
output logic IntDivM,
|
||||
// integer normalization shifter signals
|
||||
output logic [P.INTDIVb+3:0] PreResultM,
|
||||
input logic [P.XLEN-1:0] PreIntResultM,
|
||||
output logic [P.DIVBLEN-1:0] IntNormShiftM
|
||||
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [P.DURLEN:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
|
||||
divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
|
||||
// Int-specific
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
||||
.BZeroM, .AM,
|
||||
.IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM);
|
||||
|
||||
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
|
||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
|
||||
// Int-specific
|
||||
.IDivStartE, .ISpecialCaseE, .IntDivE);
|
||||
|
||||
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
|
||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
|
||||
.FirstU, .FirstUM, .FirstC, .FirstWS(WS), .FirstWC(WC));
|
||||
|
||||
divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||
.SqrtE, .SqrtM, .SpecialCaseM,
|
||||
.UmM, .WZeroE, .DivStickyM,
|
||||
// Int-specific
|
||||
.ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||
.FIntDivResultM, .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM);
|
||||
|
||||
|
||||
endmodule
|
||||
|
73
src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
Normal file
73
src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
Normal file
@ -0,0 +1,73 @@
|
||||
///////////////////////////////////////////
|
||||
// divshiftcalc.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Division shift calculation
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtdivshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NF+2:0] DivUm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||
output logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt, // divsqrt shift amount
|
||||
output logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn, // divsqrt shift input
|
||||
output logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
output logic DivSubnormShiftPos // is the subnormal shift amount positive
|
||||
);
|
||||
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] NormShift; // normalized result shift amount
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negative)
|
||||
logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount
|
||||
|
||||
// is the result subnormal
|
||||
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
|
||||
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
|
||||
|
||||
// if the result is subnormal
|
||||
// 00000000x.xxxxxx... Exp = DivUe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
|
||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||
// Left shift amount = DivUe+NF+1-1
|
||||
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
|
||||
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
|
||||
|
||||
// if the result is normalized
|
||||
// 00000000x.xxxxxx... Exp = DivUe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivUe+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
|
||||
// inital Left shift amount = NF
|
||||
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
||||
assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF);
|
||||
|
||||
// if the shift amount is negative then don't shift (keep sticky bit)
|
||||
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
|
||||
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0;
|
||||
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
|
||||
|
||||
// pre-shift the divider result for normalization
|
||||
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}};
|
||||
endmodule
|
27
src/fpu/divremsqrt/divremsqrtearlyterm.sv
Normal file
27
src/fpu/divremsqrt/divremsqrtearlyterm.sv
Normal file
@ -0,0 +1,27 @@
|
||||
module divremsqrtearlyterm import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+3:0] D, // Q4.DIVb
|
||||
input logic [P.DIVb:0] FirstUM, // U1.DIVb
|
||||
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||
input logic SqrtE,
|
||||
output logic WZeroE
|
||||
);
|
||||
logic weq0E;
|
||||
aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
|
||||
if (P.RADIX == 2) begin: R2EarlyTerm
|
||||
logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
|
||||
logic [P.DIVb+2:0] FirstK;
|
||||
logic wfeq0E;
|
||||
logic [P.DIVb+3:0] WCF, WSF;
|
||||
|
||||
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
|
||||
assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
|
||||
assign FZeroDivE = D << 1; // F for divide
|
||||
mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
|
||||
csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
|
||||
aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
|
||||
assign WZeroE = weq0E|wfeq0E;
|
||||
end else begin
|
||||
assign WZeroE = weq0E;
|
||||
end
|
||||
endmodule
|
83
src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv
Normal file
83
src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv
Normal file
@ -0,0 +1,83 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtcycles.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
|
||||
// Modified: 18 April 2022
|
||||
//
|
||||
// Purpose: Determine number of cycles for divsqrt
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtfdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic IntDivE,
|
||||
input logic [P.DIVBLEN-1:0] IntResultBitsE,
|
||||
output logic [P.DURLEN:0] CyclesE
|
||||
);
|
||||
|
||||
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
if (P.FPSIZES == 1)
|
||||
assign Nf = P.NF;
|
||||
else if (P.FPSIZES == 2)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
1'b0: Nf = P.NF1;
|
||||
1'b1: Nf = P.NF;
|
||||
endcase
|
||||
else if (P.FPSIZES == 3)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
P.FMT: Nf = P.NF;
|
||||
P.FMT1: Nf = P.NF1;
|
||||
P.FMT2: Nf = P.NF2;
|
||||
default: Nf = 'x; // shouldn't happen
|
||||
endcase
|
||||
else if (P.FPSIZES == 4)
|
||||
always_comb
|
||||
case(FmtE)
|
||||
P.S_FMT: Nf = P.S_NF;
|
||||
P.D_FMT: Nf = P.D_NF;
|
||||
P.H_FMT: Nf = P.H_NF;
|
||||
P.Q_FMT: Nf = P.Q_NF;
|
||||
endcase
|
||||
|
||||
// Cycle logic
|
||||
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
||||
// Integer division needs p fractional + r integer result bits
|
||||
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
|
||||
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
|
||||
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
||||
|
||||
always_comb begin
|
||||
FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
|
||||
|
||||
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
|
||||
else ResultBitsE = FPResultBitsE;
|
||||
|
||||
CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
|
||||
end
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
endmodule
|
79
src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv
Normal file
79
src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv
Normal file
@ -0,0 +1,79 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtexpcalc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Exponent caclulation for divide and square root
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtfdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
input logic [P.NE-1:0] Xe, Ye, // input exponents
|
||||
input logic Sqrt,
|
||||
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
|
||||
output logic [P.NE+1:0] Ue // result exponent
|
||||
);
|
||||
|
||||
logic [P.NE-2:0] Bias;
|
||||
logic [P.NE+1:0] SXExp;
|
||||
logic [P.NE+1:0] SExp;
|
||||
logic [P.NE+1:0] DExp;
|
||||
|
||||
// Determine exponent bias according to the format
|
||||
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
P.FMT: Bias = (P.NE-1)'(P.BIAS);
|
||||
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
|
||||
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
|
||||
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
|
||||
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
|
||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
end
|
||||
|
||||
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
|
||||
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
||||
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
||||
|
||||
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
|
||||
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||
|
||||
// Select square root or division exponent
|
||||
assign Ue = Sqrt ? SExp : DExp;
|
||||
endmodule
|
116
src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
Normal file
116
src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
Normal file
@ -0,0 +1,116 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtpostproc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Divide/Square root postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtfdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
input logic StallM,
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+3:0] D, // Q4.DIVb
|
||||
input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
|
||||
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||
input logic SqrtE,
|
||||
input logic SqrtM, SpecialCaseM,
|
||||
input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0)
|
||||
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM,
|
||||
input logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
input logic [P.XLEN-1:0] PreIntResultM,
|
||||
output logic [P.DIVb:0] UmM, // U1.DIVb result significand
|
||||
output logic WZeroE,
|
||||
output logic DivStickyM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM, // U/Q(XLEN.0)
|
||||
output logic [P.INTDIVb+3:0] PreResultM
|
||||
|
||||
);
|
||||
|
||||
logic [P.DIVb+3:0] Sum;
|
||||
logic [P.INTDIVb+3:0] W;
|
||||
logic [P.DIVb:0] PreUmM;
|
||||
logic NegStickyM;
|
||||
logic weq0E, WZeroM;
|
||||
logic [P.XLEN-1:0] IntDivResultM;
|
||||
logic NegQuotM; // Integer quotient is negative
|
||||
|
||||
//////////////////////////
|
||||
// Execute Stage: Detect early termination for an exact result
|
||||
//////////////////////////
|
||||
|
||||
// check for early termination on an exact result.
|
||||
divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS, .WZeroE);
|
||||
|
||||
|
||||
//////////////////////////
|
||||
// E/M Pipeline register
|
||||
//////////////////////////
|
||||
|
||||
flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
|
||||
|
||||
//////////////////////////
|
||||
// Memory Stage: Postprocessing
|
||||
//////////////////////////
|
||||
|
||||
// If the result is not exact, the sticky should be set
|
||||
assign DivStickyM = ~WZeroM & ~SpecialCaseM;
|
||||
|
||||
// Determine if sticky bit is negative *** Full sum only needed for Integer
|
||||
assign Sum = WC + WS;
|
||||
assign NegStickyM = Sum[P.DIVb+3];
|
||||
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
|
||||
|
||||
// Integer quotient or remainder correction, normalization, and special cases
|
||||
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||
logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||
logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM;
|
||||
logic [P.INTDIVb+3:0] DTrunc, SumTrunc;
|
||||
|
||||
assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb];
|
||||
assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb];
|
||||
arithrightshift #(P) rshift(SumTrunc, W);
|
||||
|
||||
assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]};
|
||||
|
||||
// Integer remainder: sticky and sign correction muxes
|
||||
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
|
||||
mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM);
|
||||
|
||||
// Select quotient or remainder and do normalization shift
|
||||
mux2 #(P.INTDIVb+4) presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM);
|
||||
intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM);
|
||||
mux2 #(P.INTDIVb+4) preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM);
|
||||
|
||||
divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM);
|
||||
// sign extend result for W64
|
||||
if (P.XLEN==64) begin
|
||||
mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
|
||||
{{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
|
||||
W64M, FIntDivResultM);
|
||||
end else
|
||||
assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
|
||||
end
|
||||
endmodule
|
250
src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
Normal file
250
src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
Normal file
@ -0,0 +1,250 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtpreproc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtfdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [P.NF:0] Xm, Ym, // Floating-point significands
|
||||
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [P.NE+1:0] UeM, // biased exponent of result
|
||||
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||
// Int-specific
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
|
||||
input logic IntDivE, W64E,
|
||||
// Outputs
|
||||
output logic ISpecialCaseE,
|
||||
output logic [P.DURLEN:0] CyclesE,
|
||||
output logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
output logic ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM,
|
||||
output logic AsM, BsM, BZeroM,
|
||||
output logic [P.XLEN-1:0] AM
|
||||
);
|
||||
|
||||
logic [P.DIVb:0] Xnorm, Dnorm;
|
||||
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
|
||||
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs
|
||||
logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic SIGNOVERFLOWE;
|
||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||
logic SignedDivE; // signed division
|
||||
logic AsE, BsE; // Signs of integer inputs
|
||||
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||
logic ALTBE;
|
||||
logic EvenExp;
|
||||
|
||||
logic [$clog2(P.RK):0] RightShiftX;
|
||||
logic [P.DIVBLEN-1:0] ZeroDiff, p;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Preprocessing
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||
logic [P.XLEN-1:0] BE, PosA, PosB;
|
||||
|
||||
// Extract inputs, signs, zero, depending on W64 mode if applicable
|
||||
assign SignedDivE = ~Funct3E[0];
|
||||
|
||||
// Source handling
|
||||
if (P.XLEN==64) begin // 64-bit, supports W64
|
||||
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
|
||||
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
|
||||
end else begin // 32 bits only
|
||||
assign AE = ForwardedSrcAE;
|
||||
assign BE = ForwardedSrcBE;
|
||||
end
|
||||
assign AZeroE = ~(|AE);
|
||||
assign BZeroE = ~(|BE);
|
||||
assign AsE = AE[P.XLEN-1] & SignedDivE;
|
||||
assign BsE = BE[P.XLEN-1] & SignedDivE;
|
||||
|
||||
// Force integer inputs to be postiive
|
||||
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
|
||||
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||
|
||||
// Select integer or floating point inputs
|
||||
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
|
||||
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
|
||||
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
||||
end else begin // Int not supported
|
||||
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
|
||||
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
|
||||
assign NumerZeroE = XZeroE;
|
||||
end
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer & FP leading zero and normalization shift
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
// count leading zeros for Subnorm FP and to normalize integer inputs
|
||||
divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell);
|
||||
divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE);
|
||||
|
||||
// Normalization shift: shift leading one into most significant bit
|
||||
assign Xnorm = (IFX << ell);
|
||||
assign Dnorm = (IFD << mE);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Right Shift to digit boundary
|
||||
// Determine DivXShifted (X shifted to digit boundary)
|
||||
// and nE (number of fractional digits)
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||
|
||||
// calculate number of result bits
|
||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||
assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros)
|
||||
assign SIGNOVERFLOWE = 1'b0;
|
||||
|
||||
mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
// Integer special cases (terminate immediately)
|
||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||
|
||||
// calculate right shift amount RightShiftX to complete in discrete number of steps
|
||||
if (P.RK > 1) begin // more than 1 bit per cycle
|
||||
|
||||
/* verilator lint_offf WIDTH */
|
||||
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
|
||||
/* verilator lint_on WIDTH */
|
||||
end else begin // radix 2 1 copy doesn't require shifting
|
||||
assign DivXShifted = DivX;
|
||||
assign RightShiftX = 0;
|
||||
end
|
||||
end else begin
|
||||
assign ISpecialCaseE = 0;
|
||||
end
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Floating-Point Preprocessing
|
||||
// Extend to Q4.b format
|
||||
// shift square root to be in range [1/4, 1)
|
||||
// Normalized numbers are shifted right by 1 if the exponent is odd
|
||||
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
||||
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
|
||||
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
|
||||
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
|
||||
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
|
||||
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
|
||||
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
|
||||
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
|
||||
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
||||
// Radix Exponent odd Exponent Even
|
||||
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
||||
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
|
||||
// Summary: PreSqrtX = r(x/2or4 - 1)
|
||||
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
||||
mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even
|
||||
|
||||
/*
|
||||
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
|
||||
// This saves one bit in DIVb because there is no initial right shift.
|
||||
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
|
||||
// That is an optimization for another day.
|
||||
if (P.RADIX == 2) begin
|
||||
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||
end else begin
|
||||
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
|
||||
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
||||
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
||||
end
|
||||
*/
|
||||
|
||||
// Initialize X for division or square root
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Selet integer or floating-point operands
|
||||
//////////////////////////////////////////////////////
|
||||
if (P.IDIV_ON_FPU) begin
|
||||
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
||||
end else begin
|
||||
assign X = PreShiftX;
|
||||
end
|
||||
|
||||
// Divisior register
|
||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
|
||||
|
||||
// Floating-point exponent
|
||||
divremsqrtfdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
|
||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
|
||||
|
||||
// Number of FSM cycles (to FSM)
|
||||
divremsqrtfdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
|
||||
logic RemOpE;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
|
||||
assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
|
||||
/* verilator lint_on WIDTH */
|
||||
assign RemOpE = Funct3E[1];
|
||||
mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
|
||||
|
||||
// pipeline registers
|
||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
|
||||
flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
|
||||
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||
if (P.XLEN==64)
|
||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
183
src/fpu/divremsqrt/divremsqrtflags.sv
Normal file
183
src/fpu/divremsqrt/divremsqrtflags.sv
Normal file
@ -0,0 +1,183 @@
|
||||
|
||||
///////////////////////////////////////////
|
||||
// flags.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Post-Processing flag calculation
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtflags import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic XInf, YInf, // inputs are infinity
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic XSNaN, YSNaN, // inputs are signaling NaNs
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
input logic [P.NE+1:0] Me, // exponent of the normalized sum
|
||||
// rounding
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Round, Guard, Sticky, // bits used to determine rounding
|
||||
input logic UfPlus1, // do you add one for rounding for the unbounded exponent result
|
||||
// divsqrt
|
||||
input logic DivOp, // conversion opperation?
|
||||
input logic Sqrt, // Sqrt?
|
||||
// flags
|
||||
output logic DivByZero, // divide by zero flag
|
||||
output logic Overflow, // overflow flag to select result
|
||||
output logic Invalid, // invalid flag to select the result
|
||||
output logic [4:0] PostProcFlg // flags
|
||||
);
|
||||
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic Inexact; // final inexact flag
|
||||
logic FpInexact; // floating point inexact flag
|
||||
logic DivInvalid; // integer invalid flag
|
||||
logic Underflow; // Underflow flag
|
||||
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Overflow
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine if the result exponent is greater than or equal to the maximum exponent or
|
||||
// the shift amount is greater than the integers size (for cvt to int)
|
||||
// ShiftGtIntSz calculation:
|
||||
// a left shift of intlen+1 is still in range but any more than that is an overflow
|
||||
// inital: | 64 0's | XLEN |
|
||||
// | 64 0's | XLEN | << 64
|
||||
// | XLEN | 00000... |
|
||||
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// larger or equal if:
|
||||
// - any of the bits after the most significan 1 is one
|
||||
// - the most signifcant in 65 or 33 is still a one in the number and
|
||||
// one of the later bits is one
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
|
||||
default: ResExpGteMax = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
|
||||
P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
|
||||
P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
|
||||
P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// calulate overflow flag:
|
||||
// if the result is greater than or equal to the max exponent(not taking into account sign)
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Underflow
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate underflow flag: detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
// | the result is subnormal
|
||||
// | | the result is normal and rounded from a Subnorm
|
||||
// | | | and if given an unbounded exponent the result does not round
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
//assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Inexact
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
//assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = FpInexact;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Invalid
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
|
||||
assign SigNaN = (XSNaN) | (YSNaN) ;
|
||||
|
||||
//invalid flag for division
|
||||
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
|
||||
|
||||
assign Invalid = SigNaN | (DivInvalid&DivOp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Divide by Zero
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// if dividing by zero and not 0/0
|
||||
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
|
||||
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// final flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Combine flags
|
||||
// - to integer results do not set the underflow or overflow flags
|
||||
assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
15
src/fpu/divremsqrt/divremsqrtintspecialcase.sv
Normal file
15
src/fpu/divremsqrt/divremsqrtintspecialcase.sv
Normal file
@ -0,0 +1,15 @@
|
||||
module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) (
|
||||
input logic BZeroM,RemOpM, ALTBM,
|
||||
input logic [P.XLEN-1:0] AM,
|
||||
input signed [P.INTDIVb+3:0] PreIntResultM,
|
||||
output logic [P.XLEN-1:0] IntDivResultM
|
||||
);
|
||||
always_comb
|
||||
if (BZeroM) begin // Divide by zero
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = {(P.XLEN){1'b1}};
|
||||
end else if (ALTBM) begin // Numerator is small
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = 0;
|
||||
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
||||
endmodule
|
39
src/fpu/divremsqrt/divremsqrtlzc.sv
Normal file
39
src/fpu/divremsqrt/divremsqrtlzc.sv
Normal file
@ -0,0 +1,39 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Leading Zero Counter
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtlzc #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] num, // number to count the leading zeroes of
|
||||
output logic [$clog2(WIDTH)-1:0] ZeroCnt // the number of leading zeroes
|
||||
);
|
||||
|
||||
integer i;
|
||||
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1; // search for leading one
|
||||
ZeroCnt = i[$clog2(WIDTH)-1:0];
|
||||
end
|
||||
endmodule
|
81
src/fpu/divremsqrt/divremsqrtnormshift.sv
Normal file
81
src/fpu/divremsqrt/divremsqrtnormshift.sv
Normal file
@ -0,0 +1,81 @@
|
||||
///////////////////////////////////////////
|
||||
// normshift.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: normalization shifter
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// convert shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if necessary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if necessary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if necessary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if necessary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is subnormal or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if necessary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is subnormal:
|
||||
// | lzcIn | 0's if necessary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if necessary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
// fma shift
|
||||
// | 00 | Sm | << LZA output
|
||||
// .
|
||||
// - two extra bits so we can correct for an LZA error of 1 or 2
|
||||
|
||||
// divsqrt shift
|
||||
// | Nf 0's | Qm | << calculated shift amount
|
||||
// .
|
||||
|
||||
module divremsqrtnormshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt, // shift amount
|
||||
input logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn, // number to be shifted
|
||||
output logic [P.NORMSHIFTSZDRSU-1:0] Shifted // shifted result
|
||||
);
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
endmodule
|
177
src/fpu/divremsqrt/divremsqrtpostprocess.sv
Normal file
177
src/fpu/divremsqrt/divremsqrtpostprocess.sv
Normal file
@ -0,0 +1,177 @@
|
||||
///////////////////////////////////////////
|
||||
// postprocess.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) (
|
||||
// general signals
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [P.NF:0] Xm, Ym, // input mantissas
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [3:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic XInf, YInf, // inputs are infinity
|
||||
input logic XNaN, YNaN, // inputs are NaN
|
||||
input logic XSNaN, YSNaN, // inputs are signaling NaNs
|
||||
input logic [1:0] PostProcSel, // select result to be written to fp register
|
||||
//fma signals
|
||||
//divide signals
|
||||
input logic DivSticky, // divider sticky bit
|
||||
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||
input logic [P.NF+2:0] DivUm, // divsqrt significand
|
||||
input logic [P.DIVBLEN-1:0] IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right)
|
||||
input logic [P.INTDIVb+3:0] PreResultM, // integer result to be shifted
|
||||
input logic IntDivM,
|
||||
// final results
|
||||
output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result
|
||||
output logic [4:0] PostProcFlg, // postprocesser flags
|
||||
output logic [P.XLEN-1:0] PreIntResultM // normalized integer result
|
||||
);
|
||||
|
||||
|
||||
// general signals
|
||||
logic Rs; // result sign
|
||||
logic [P.NF-1:0] Rf; // Result fraction
|
||||
logic [P.NE-1:0] Re; // Result exponent
|
||||
logic Ms; // norMalized sign
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] Mf; // norMalized fraction
|
||||
logic [P.NE+1:0] Me; // normalized exponent
|
||||
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt; // normalization shift amount
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn; // input to normalization shift
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
|
||||
logic Plus1; // add one to the final result?
|
||||
logic Overflow; // overflow flag used to select results
|
||||
logic Invalid; // invalid flag used to select results
|
||||
logic Guard, Round, Sticky; // bits needed to determine rounding
|
||||
logic [P.FMTBITS-1:0] OutFmt; // output format
|
||||
// division singals
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt; // divsqrt shif amount
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn; // divsqrt shift input
|
||||
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
|
||||
logic DivByZero; // divide by zero flag
|
||||
logic DivResSubnorm; // is the divsqrt result subnormal
|
||||
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
||||
// conversion signals
|
||||
logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter
|
||||
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
|
||||
logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result
|
||||
logic CvtResUf; // did the convert result underflow
|
||||
logic IntInvalid; // invalid integer flag
|
||||
// readability signals
|
||||
logic Mult; // multiply opperation
|
||||
logic Sqrt; // is the divsqrt opperation sqrt
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic CvtOp; // convertion opperation
|
||||
logic DivOp; // divider opperation
|
||||
logic InfIn; // are any of the inputs infinity
|
||||
logic NaNIn; // are any of the inputs NaN
|
||||
|
||||
// signals to help readability
|
||||
|
||||
assign DivOp = (PostProcSel == 2'b01);
|
||||
assign Sqrt = OpCtrl[0];
|
||||
|
||||
// is there an input of infinity or NaN being used
|
||||
assign InfIn = XInf|YInf;
|
||||
assign NaNIn = XNaN|YNaN;
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: Fmt contains the percision of the output
|
||||
if (P.FPSIZES == 2)
|
||||
//assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT);
|
||||
assign OutFmt = Fmt;
|
||||
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
//assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
|
||||
assign OutFmt = Fmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// final claulations before shifting
|
||||
|
||||
divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
assign ShiftAmt = DivShiftAmt;
|
||||
assign ShiftIn = DivShiftIn;
|
||||
|
||||
// main normalization shift
|
||||
divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
// correct for LZA/divsqrt error
|
||||
divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
// calulate result sign used in rounding unit
|
||||
divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms);
|
||||
|
||||
divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue,
|
||||
.Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign Rs = Ms;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero,
|
||||
.Xs, .OutFmt, .Sqrt,
|
||||
.NaNIn, .Round, .DivByZero,
|
||||
.Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1,
|
||||
.Me, .Invalid, .Overflow, .PostProcFlg);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
|
||||
|
||||
divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero,
|
||||
.Frm, .OutFmt, .XNaN, .YNaN,
|
||||
.NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
|
||||
.XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
|
||||
|
||||
endmodule
|
268
src/fpu/divremsqrt/divremsqrtround.sv
Normal file
268
src/fpu/divremsqrt/divremsqrtround.sv
Normal file
@ -0,0 +1,268 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtround.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu, me@KatherineParry.com
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Rounder
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
module divremsqrtround import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic Ms, // normalized sign
|
||||
input logic [P.NORMSHIFTSZDRSU-1:0] Mf, // normalized fraction
|
||||
// divsqrt
|
||||
input logic DivOp, // is a division opperation being done
|
||||
input logic DivSticky, // divsqrt sticky bit
|
||||
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
|
||||
// outputs
|
||||
output logic [P.NE+1:0] Me, // normalied fraction
|
||||
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
|
||||
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
output logic [P.NE-1:0] Re, // Result exponent
|
||||
output logic [P.NF-1:0] Rf, // Result fractionNormS
|
||||
output logic Sticky, // sticky bit
|
||||
output logic Plus1, // do you add one to the final result
|
||||
output logic Round, Guard // bits needed to calculate rounding
|
||||
);
|
||||
|
||||
logic UfCalcPlus1; // calculated plus one for unbounded exponent
|
||||
logic NormSticky; // normalized sum's sticky bit
|
||||
logic [P.NF-1:0] RoundFrac; // rounded fraction
|
||||
logic FpGuard, FpRound; // floating point round/guard bits
|
||||
logic FpLsbRes; // least significant bit of floating point result
|
||||
logic LsbRes; // lsb of result
|
||||
logic CalcPlus1; // calculated plus1
|
||||
logic FpPlus1; // do you add one to the fp result
|
||||
logic [P.FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - plus 1 otherwise
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - Plus 1 otherwise
|
||||
|
||||
|
||||
// determine what format the final result is in: int or fp
|
||||
|
||||
// sticky bit calculation
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
|
||||
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT));
|
||||
end
|
||||
|
||||
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
//assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm;
|
||||
assign Sticky = DivSticky&DivOp | NormSticky;
|
||||
//assign Sticky = DivSticky&DivOp;
|
||||
|
||||
|
||||
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
|
||||
assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
|
||||
assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
|
||||
assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1];
|
||||
assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
|
||||
end
|
||||
P.FMT1: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
|
||||
end
|
||||
P.FMT2: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpGuard = 1'bx;
|
||||
FpLsbRes = 1'bx;
|
||||
FpRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
assign Guard = FpGuard;
|
||||
assign LsbRes = FpLsbRes;
|
||||
assign Round = FpRound;
|
||||
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (Frm)
|
||||
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = Ms;//round down
|
||||
3'b011: CalcPlus1 = ~Ms;//round up
|
||||
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (Frm)
|
||||
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = Ms;//round down
|
||||
3'b011: UfCalcPlus1 = ~Ms;//round up
|
||||
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
|
||||
assign FpPlus1 = Plus1;
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
|
||||
|
||||
|
||||
|
||||
|
||||
// place Plus1 into the proper position for the format
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
|
||||
|
||||
end else if (P.FPSIZES == 4)
|
||||
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
|
||||
|
||||
|
||||
|
||||
// trim unneeded bits from fraction
|
||||
assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF];
|
||||
|
||||
|
||||
|
||||
// select the exponent
|
||||
assign Me = Ue;
|
||||
|
||||
|
||||
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
|
||||
assign Re = FullRe[P.NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
||||
|
45
src/fpu/divremsqrt/divremsqrtroundsign.sv
Normal file
45
src/fpu/divremsqrt/divremsqrtroundsign.sv
Normal file
@ -0,0 +1,45 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtroundsign.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu,me@KatherineParry.com
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Sign calculation for rounding
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // x sign
|
||||
input logic Ys, // y sign
|
||||
input logic Sqrt, // sqrt oppertion? (when using divsqrt unit)
|
||||
input logic DivOp, // is divsqrt opperation
|
||||
output logic Ms // normalized result sign
|
||||
);
|
||||
|
||||
logic Qs; // divsqrt result sign
|
||||
|
||||
// calculate divsqrt sign
|
||||
assign Qs = Xs^(Ys&~Sqrt);
|
||||
|
||||
// Select sign for rounding calulation
|
||||
assign Ms = (Qs&DivOp);
|
||||
|
||||
endmodule
|
94
src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
Normal file
94
src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
Normal file
@ -0,0 +1,94 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtshiftcorrection.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: shift correction
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NORMSHIFTSZDRSU-1:0] Shifted, // the shifted sum before LZA correction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
|
||||
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
||||
//fma
|
||||
//input logic FmaOp, // is it an fma opperation
|
||||
//input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
//input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
|
||||
//input logic FmaSZero,
|
||||
// output
|
||||
//output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
||||
output logic [P.NORMSHIFTSZDRSU-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [P.NE+1:0] Ue // corrected exponent for divider
|
||||
);
|
||||
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
|
||||
logic ResSubnorm; // is the result Subnormal
|
||||
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
|
||||
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
|
||||
|
||||
// LZA correction
|
||||
assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1];
|
||||
|
||||
// correct the shifting error caused by the LZA
|
||||
// - the only possible mantissa for a plus two is all zeroes
|
||||
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
//mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted);
|
||||
|
||||
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
||||
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
||||
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
|
||||
//assign LeftShiftQm = ((DivUe==1));
|
||||
assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}};
|
||||
assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}};
|
||||
mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||
|
||||
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
|
||||
always_comb
|
||||
//if(FmaOp) Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}};
|
||||
//if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
|
||||
if (~DivResSubnorm) Mf = CorrQmShifted;
|
||||
else Mf = Shifted[P.NORMSHIFTSZDRSU-1:0];
|
||||
|
||||
// Determine sum's exponent
|
||||
// main exponent issues:
|
||||
// - LZA was one too large
|
||||
// - LZA was two too large
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
|
||||
// if plus1 If plus2 kill if the result Zero or actually subnormal
|
||||
// | | |
|
||||
//assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
|
||||
|
||||
// recalculate if the result is subnormal after LZA correction
|
||||
//assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1];
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
||||
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
//assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
endmodule
|
240
src/fpu/divremsqrt/divremsqrtspecialcase.sv
Normal file
240
src/fpu/divremsqrt/divremsqrtspecialcase.sv
Normal file
@ -0,0 +1,240 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtspecialcase.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu,me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: special case selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [P.NF:0] Xm, Ym, // input significand's
|
||||
input logic XNaN, YNaN, // are the inputs NaN
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // are any inputs infinity
|
||||
input logic NaNIn, // are any input NaNs
|
||||
input logic XInf, YInf, // are X or Y inifnity
|
||||
input logic XZero, // is X zero
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Rs, // the result's sign
|
||||
input logic Invalid, Overflow, // flags to choose the result
|
||||
input logic [P.NE-1:0] Re, // Result exponent
|
||||
input logic [P.NE+1:0] FullRe, // Result full exponent
|
||||
input logic [P.NF-1:0] Rf, // Result fraction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivByZero, // divide by zero flag
|
||||
// outputs
|
||||
output logic [P.FLEN-1:0] PostProcRes // final result
|
||||
);
|
||||
|
||||
logic [P.FLEN-1:0] XNaNRes; // X is NaN result
|
||||
logic [P.FLEN-1:0] YNaNRes; // Y is NaN result
|
||||
logic [P.FLEN-1:0] InvalidRes; // Invalid result result
|
||||
logic [P.FLEN-1:0] UfRes; // underflowed result result
|
||||
logic [P.FLEN-1:0] OfRes; // overflowed result result
|
||||
logic [P.FLEN-1:0] NormRes; // normal result
|
||||
logic OfResMax; // does the of result output maximum norm fp number
|
||||
logic KillRes; // kill the result for underflow
|
||||
logic SelOfRes; // should the overflow result be selected
|
||||
|
||||
|
||||
// does the overflow result output the maximum normalized floating point number
|
||||
// output infinity if the input is infinity
|
||||
assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
|
||||
|
||||
// select correct outputs for special cases
|
||||
if (P.FPSIZES == 1) begin
|
||||
//NaN res selection depending on standard
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = {Rs, Re, Rf};
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
|
||||
always_comb
|
||||
if(OutFmt)
|
||||
if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
|
||||
else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
else
|
||||
if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
|
||||
else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
P.FMT1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
end
|
||||
P.FMT2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
|
||||
YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = (P.FLEN)'(0);
|
||||
YNaNRes = (P.FLEN)'(0);
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end else begin
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end
|
||||
OfRes = (P.FLEN)'(0);
|
||||
UfRes = (P.FLEN)'(0);
|
||||
NormRes = (P.FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
2'h1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
|
||||
YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
|
||||
YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
|
||||
YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};
|
||||
// zero is exact if dividing by infinity so don't add 1
|
||||
UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// determine if you shoould kill the res - Cvt
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
|
||||
|
||||
// calculate if the overflow result should be selected
|
||||
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
|
||||
|
||||
// output infinity with result sign if divide by zero
|
||||
if(P.IEEE754)
|
||||
always_comb
|
||||
if(XNaN) PostProcRes = XNaNRes;
|
||||
else if(YNaN) PostProcRes = YNaNRes;
|
||||
else if(Invalid) PostProcRes = InvalidRes;
|
||||
else if(SelOfRes) PostProcRes = OfRes;
|
||||
else if(KillRes) PostProcRes = UfRes;
|
||||
else PostProcRes = NormRes;
|
||||
else
|
||||
always_comb
|
||||
if(NaNIn|Invalid) PostProcRes = InvalidRes;
|
||||
else if(SelOfRes) PostProcRes = OfRes;
|
||||
else if(KillRes) PostProcRes = UfRes;
|
||||
else PostProcRes = NormRes;
|
||||
|
||||
endmodule
|
102
src/fpu/divremsqrt/drsu.sv
Normal file
102
src/fpu/divremsqrt/drsu.sv
Normal file
@ -0,0 +1,102 @@
|
||||
///////////////////////////////////////////
|
||||
// drsu.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified:19 May 2023
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module drsu import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE, YsE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XSNaNE, YSNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
input logic [2:0] Frm,
|
||||
input logic [3:0] OpCtrl,
|
||||
input logic [1:0] PostProcSel,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.FLEN-1:0] FResM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM,
|
||||
output logic [4:0] FlgM
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
logic [P.DIVb:0] UmM;
|
||||
logic [P.NF+2:0] UmMexact; //U1.NF+2
|
||||
logic [P.NE+1:0] UeM;
|
||||
logic DivStickyM;
|
||||
logic [P.INTDIVb+3:0] PreResultM;
|
||||
logic [P.XLEN-1:0] PreIntResultM;
|
||||
logic [P.DIVBLEN-1:0] IntNormShiftM;
|
||||
|
||||
divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE,
|
||||
.XeE, .YeE, .SqrtE, .SqrtM,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE,
|
||||
.XNaNE, .YNaNE,
|
||||
.FDivStartE, .IDivStartE, .W64E,
|
||||
.StallM, .DivStickyM, .FDivBusyE, .UeM,
|
||||
.UmM,
|
||||
.FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
|
||||
.Funct3E, .IntDivE, .FIntDivResultM, .IntDivM,
|
||||
.FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM);
|
||||
assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs
|
||||
divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM,
|
||||
.XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE),
|
||||
.YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM),
|
||||
.PreIntResultM, .PreResultM, .IntNormShiftM);
|
||||
endmodule
|
||||
|
37
src/fpu/divremsqrt/intrightshift.sv
Normal file
37
src/fpu/divremsqrt/intrightshift.sv
Normal file
@ -0,0 +1,37 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtpostproc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Divide/Square root postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module intrightshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic signed [P.INTDIVb+3:0] shiftin,
|
||||
input logic [P.DIVBLEN-1:0] shiftamt,
|
||||
output logic signed [P.INTDIVb+3:0] shifted
|
||||
);
|
||||
assign shifted = shiftin >> shiftamt;
|
||||
|
||||
endmodule
|
@ -169,12 +169,17 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi);
|
||||
CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW;
|
||||
CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW;
|
||||
CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW;
|
||||
CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW;
|
||||
CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[P.XLEN-1:0];
|
||||
// user CSRs
|
||||
CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW;
|
||||
CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW;
|
||||
CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW};
|
||||
|
||||
if (P.XLEN == 32) begin
|
||||
CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrsr.MSTATUSH_REGW;
|
||||
CSRArray[12'h31A] = testbench.dut.core.priv.priv.csr.csrm.MENVCFGH_REGW;
|
||||
CSRArray[12'h15D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[63:32];
|
||||
end
|
||||
end else begin // hold the old value if the pipeline is stalled.
|
||||
|
||||
// PMP CFG 3A0 to 3AF
|
||||
|
1682
testbench/testbench-fp.sv
Normal file
1682
testbench/testbench-fp.sv
Normal file
File diff suppressed because it is too large
Load Diff
@ -762,7 +762,7 @@ end
|
||||
void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org"));
|
||||
void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv"));
|
||||
void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GCK"));
|
||||
void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 56));
|
||||
void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, XLEN==64 ? 56 : 34));
|
||||
void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6));
|
||||
|
||||
if(elffilename == "buildroot") filename = "";
|
||||
@ -824,15 +824,25 @@ end
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hC02)); // INSTRET
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hB02)); // MINSTRET
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hC01)); // TIME
|
||||
|
||||
if (P.XLEN == 32) begin
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hB80)); // MCYCLEH
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hC82)); // INSTRETH
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH
|
||||
void'(rvviRefCsrSetVolatile(0, 32'hC81)); // TIMEH
|
||||
end
|
||||
// User HPMCOUNTER3 - HPMCOUNTER31
|
||||
for (iter='hC03; iter<='hC1F; iter++) begin
|
||||
void'(rvviRefCsrSetVolatile(0, iter)); // HPMCOUNTERx
|
||||
if (P.XLEN == 32)
|
||||
void'(rvviRefCsrSetVolatile(0, iter+128)); // HPMCOUNTERxH
|
||||
end
|
||||
|
||||
// Machine MHPMCOUNTER3 - MHPMCOUNTER31
|
||||
for (iter='hB03; iter<='hB1F; iter++) begin
|
||||
void'(rvviRefCsrSetVolatile(0, iter)); // MHPMCOUNTERx
|
||||
if (P.XLEN == 32)
|
||||
void'(rvviRefCsrSetVolatile(0, iter+128)); // MHPMCOUNTERxH
|
||||
end
|
||||
|
||||
// cannot predict this register due to latency between
|
||||
|
File diff suppressed because it is too large
Load Diff
639
testbench/tests-fp.vh
Normal file
639
testbench/tests-fp.vh
Normal file
@ -0,0 +1,639 @@
|
||||
//////////////////////////////////////////
|
||||
// tests0fo.vh
|
||||
//
|
||||
// Written: Katherine Parry 2022
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: List of floating-point tests to apply
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-3 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define PATH "../../tests/fp/vectors/"
|
||||
`define ADD_OPCTRL 4'b0110
|
||||
`define MUL_OPCTRL 4'b0100
|
||||
`define SUB_OPCTRL 4'b0111
|
||||
`define FMA_OPCTRL 4'b0000
|
||||
`define DIV_OPCTRL 4'b0000
|
||||
`define SQRT_OPCTRL 4'b0001
|
||||
`define LE_OPCTRL 4'b0011
|
||||
`define LT_OPCTRL 4'b0001
|
||||
`define EQ_OPCTRL 4'b0010
|
||||
`define TO_UI_OPCTRL 4'b0000
|
||||
`define TO_I_OPCTRL 4'b0001
|
||||
`define TO_UL_OPCTRL 4'b0010
|
||||
`define TO_L_OPCTRL 4'b0011
|
||||
`define FROM_UI_OPCTRL 4'b0100
|
||||
`define FROM_I_OPCTRL 4'b0101
|
||||
`define FROM_UL_OPCTRL 4'b0110
|
||||
`define FROM_L_OPCTRL 4'b0111
|
||||
`define INTREMU_OPCTRL 4'b1001
|
||||
`define INTREM_OPCTRL 4'b1010
|
||||
`define INTDIV_OPCTRL 4'b1011
|
||||
`define INTDIVW_OPCTRL 4'b1100
|
||||
`define INTDIVU_OPCTRL 4'b1101
|
||||
`define INTREMW_OPCTRL 4'b1110
|
||||
`define INTREMUW_OPCTRL 4'b1111
|
||||
`define INTDIVUW_OPCTRL 4'b1000
|
||||
`define RNE 3'b000
|
||||
`define RZ 3'b001
|
||||
`define RU 3'b011
|
||||
`define RD 3'b010
|
||||
`define RNM 3'b100
|
||||
`define FMAUNIT 2
|
||||
`define DIVUNIT 1
|
||||
`define CVTINTUNIT 0
|
||||
`define CVTFPUNIT 4
|
||||
`define CMPUNIT 3
|
||||
`define DIVREMSQRTUNIT 5
|
||||
`define INTDIVUNIT 6
|
||||
|
||||
string f16rv32cvtint[] = '{
|
||||
"ui32_to_f16_rne.tv",
|
||||
"ui32_to_f16_rz.tv",
|
||||
"ui32_to_f16_ru.tv",
|
||||
"ui32_to_f16_rd.tv",
|
||||
"ui32_to_f16_rnm.tv",
|
||||
"i32_to_f16_rne.tv",
|
||||
"i32_to_f16_rz.tv",
|
||||
"i32_to_f16_ru.tv",
|
||||
"i32_to_f16_rd.tv",
|
||||
"i32_to_f16_rnm.tv",
|
||||
"f16_to_ui32_rne.tv",
|
||||
"f16_to_ui32_rz.tv",
|
||||
"f16_to_ui32_ru.tv",
|
||||
"f16_to_ui32_rd.tv",
|
||||
"f16_to_ui32_rnm.tv",
|
||||
"f16_to_i32_rne.tv",
|
||||
"f16_to_i32_rz.tv",
|
||||
"f16_to_i32_ru.tv",
|
||||
"f16_to_i32_rd.tv",
|
||||
"f16_to_i32_rnm.tv"
|
||||
};
|
||||
|
||||
string f16rv64cvtint[] = '{
|
||||
"ui64_to_f16_rne.tv",
|
||||
"ui64_to_f16_rz.tv",
|
||||
"ui64_to_f16_ru.tv",
|
||||
"ui64_to_f16_rd.tv",
|
||||
"ui64_to_f16_rnm.tv",
|
||||
"i64_to_f16_rne.tv",
|
||||
"i64_to_f16_rz.tv",
|
||||
"i64_to_f16_ru.tv",
|
||||
"i64_to_f16_rd.tv",
|
||||
"i64_to_f16_rnm.tv",
|
||||
"f16_to_ui64_rne.tv",
|
||||
"f16_to_ui64_rz.tv",
|
||||
"f16_to_ui64_ru.tv",
|
||||
"f16_to_ui64_rd.tv",
|
||||
"f16_to_ui64_rnm.tv",
|
||||
"f16_to_i64_rne.tv",
|
||||
"f16_to_i64_rz.tv",
|
||||
"f16_to_i64_ru.tv",
|
||||
"f16_to_i64_rd.tv",
|
||||
"f16_to_i64_rnm.tv"
|
||||
};
|
||||
|
||||
string f32rv32cvtint[] = '{
|
||||
"ui32_to_f32_rne.tv",
|
||||
"ui32_to_f32_rz.tv",
|
||||
"ui32_to_f32_ru.tv",
|
||||
"ui32_to_f32_rd.tv",
|
||||
"ui32_to_f32_rnm.tv",
|
||||
"i32_to_f32_rne.tv",
|
||||
"i32_to_f32_rz.tv",
|
||||
"i32_to_f32_ru.tv",
|
||||
"i32_to_f32_rd.tv",
|
||||
"i32_to_f32_rnm.tv",
|
||||
"f32_to_ui32_rne.tv",
|
||||
"f32_to_ui32_rz.tv",
|
||||
"f32_to_ui32_ru.tv",
|
||||
"f32_to_ui32_rd.tv",
|
||||
"f32_to_ui32_rnm.tv",
|
||||
"f32_to_i32_rne.tv",
|
||||
"f32_to_i32_rz.tv",
|
||||
"f32_to_i32_ru.tv",
|
||||
"f32_to_i32_rd.tv",
|
||||
"f32_to_i32_rnm.tv"
|
||||
};
|
||||
|
||||
string f32rv64cvtint[] = '{
|
||||
"ui64_to_f32_rne.tv",
|
||||
"ui64_to_f32_rz.tv",
|
||||
"ui64_to_f32_ru.tv",
|
||||
"ui64_to_f32_rd.tv",
|
||||
"ui64_to_f32_rnm.tv",
|
||||
"i64_to_f32_rne.tv",
|
||||
"i64_to_f32_rz.tv",
|
||||
"i64_to_f32_ru.tv",
|
||||
"i64_to_f32_rd.tv",
|
||||
"i64_to_f32_rnm.tv",
|
||||
"f32_to_ui64_rne.tv",
|
||||
"f32_to_ui64_rz.tv",
|
||||
"f32_to_ui64_ru.tv",
|
||||
"f32_to_ui64_rd.tv",
|
||||
"f32_to_ui64_rnm.tv",
|
||||
"f32_to_i64_rne.tv",
|
||||
"f32_to_i64_rz.tv",
|
||||
"f32_to_i64_ru.tv",
|
||||
"f32_to_i64_rd.tv",
|
||||
"f32_to_i64_rnm.tv"
|
||||
};
|
||||
|
||||
|
||||
string f64rv32cvtint[] = '{
|
||||
"ui32_to_f64_rne.tv",
|
||||
"ui32_to_f64_rz.tv",
|
||||
"ui32_to_f64_ru.tv",
|
||||
"ui32_to_f64_rd.tv",
|
||||
"ui32_to_f64_rnm.tv",
|
||||
"i32_to_f64_rne.tv",
|
||||
"i32_to_f64_rz.tv",
|
||||
"i32_to_f64_ru.tv",
|
||||
"i32_to_f64_rd.tv",
|
||||
"i32_to_f64_rnm.tv",
|
||||
"f64_to_ui32_rne.tv",
|
||||
"f64_to_ui32_rz.tv",
|
||||
"f64_to_ui32_ru.tv",
|
||||
"f64_to_ui32_rd.tv",
|
||||
"f64_to_ui32_rnm.tv",
|
||||
"f64_to_i32_rne.tv",
|
||||
"f64_to_i32_rz.tv",
|
||||
"f64_to_i32_ru.tv",
|
||||
"f64_to_i32_rd.tv",
|
||||
"f64_to_i32_rnm.tv"
|
||||
};
|
||||
|
||||
string f64rv64cvtint[] = '{
|
||||
"ui64_to_f64_rne.tv",
|
||||
"ui64_to_f64_rz.tv",
|
||||
"ui64_to_f64_ru.tv",
|
||||
"ui64_to_f64_rd.tv",
|
||||
"ui64_to_f64_rnm.tv",
|
||||
"i64_to_f64_rne.tv",
|
||||
"i64_to_f64_rz.tv",
|
||||
"i64_to_f64_ru.tv",
|
||||
"i64_to_f64_rd.tv",
|
||||
"i64_to_f64_rnm.tv",
|
||||
"f64_to_ui64_rne.tv",
|
||||
"f64_to_ui64_rz.tv",
|
||||
"f64_to_ui64_ru.tv",
|
||||
"f64_to_ui64_rd.tv",
|
||||
"f64_to_ui64_rnm.tv",
|
||||
"f64_to_i64_rne.tv",
|
||||
"f64_to_i64_rz.tv",
|
||||
"f64_to_i64_ru.tv",
|
||||
"f64_to_i64_rd.tv",
|
||||
"f64_to_i64_rnm.tv"
|
||||
};
|
||||
|
||||
string f128rv64cvtint[] = '{
|
||||
"ui64_to_f128_rne.tv",
|
||||
"ui64_to_f128_rz.tv",
|
||||
"ui64_to_f128_ru.tv",
|
||||
"ui64_to_f128_rd.tv",
|
||||
"ui64_to_f128_rnm.tv",
|
||||
"i64_to_f128_rne.tv",
|
||||
"i64_to_f128_rz.tv",
|
||||
"i64_to_f128_ru.tv",
|
||||
"i64_to_f128_rd.tv",
|
||||
"i64_to_f128_rnm.tv",
|
||||
"f128_to_ui64_rne.tv",
|
||||
"f128_to_ui64_rz.tv",
|
||||
"f128_to_ui64_ru.tv",
|
||||
"f128_to_ui64_rd.tv",
|
||||
"f128_to_ui64_rnm.tv",
|
||||
"f128_to_i64_rne.tv",
|
||||
"f128_to_i64_rz.tv",
|
||||
"f128_to_i64_ru.tv",
|
||||
"f128_to_i64_rd.tv",
|
||||
"f128_to_i64_rnm.tv"
|
||||
};
|
||||
|
||||
string f128rv32cvtint[] = '{
|
||||
"ui32_to_f128_rne.tv",
|
||||
"ui32_to_f128_rz.tv",
|
||||
"ui32_to_f128_ru.tv",
|
||||
"ui32_to_f128_rd.tv",
|
||||
"ui32_to_f128_rnm.tv",
|
||||
"i32_to_f128_rne.tv",
|
||||
"i32_to_f128_rz.tv",
|
||||
"i32_to_f128_ru.tv",
|
||||
"i32_to_f128_rd.tv",
|
||||
"i32_to_f128_rnm.tv",
|
||||
"f128_to_ui32_rne.tv",
|
||||
"f128_to_ui32_rz.tv",
|
||||
"f128_to_ui32_ru.tv",
|
||||
"f128_to_ui32_rd.tv",
|
||||
"f128_to_ui32_rnm.tv",
|
||||
"f128_to_i32_rne.tv",
|
||||
"f128_to_i32_rz.tv",
|
||||
"f128_to_i32_ru.tv",
|
||||
"f128_to_i32_rd.tv",
|
||||
"f128_to_i32_rnm.tv"
|
||||
};
|
||||
|
||||
string f32f16cvt[] = '{
|
||||
"f32_to_f16_rne.tv",
|
||||
"f32_to_f16_rz.tv",
|
||||
"f32_to_f16_ru.tv",
|
||||
"f32_to_f16_rd.tv",
|
||||
"f32_to_f16_rnm.tv",
|
||||
"f16_to_f32_rne.tv",
|
||||
"f16_to_f32_rz.tv",
|
||||
"f16_to_f32_ru.tv",
|
||||
"f16_to_f32_rd.tv",
|
||||
"f16_to_f32_rnm.tv"
|
||||
};
|
||||
|
||||
string f64f16cvt[] = '{
|
||||
"f64_to_f16_rne.tv",
|
||||
"f64_to_f16_rz.tv",
|
||||
"f64_to_f16_ru.tv",
|
||||
"f64_to_f16_rd.tv",
|
||||
"f64_to_f16_rnm.tv",
|
||||
"f16_to_f64_rne.tv",
|
||||
"f16_to_f64_rz.tv",
|
||||
"f16_to_f64_ru.tv",
|
||||
"f16_to_f64_rd.tv",
|
||||
"f16_to_f64_rnm.tv"
|
||||
};
|
||||
|
||||
string f128f16cvt[] = '{
|
||||
"f128_to_f16_rne.tv",
|
||||
"f128_to_f16_rz.tv",
|
||||
"f128_to_f16_ru.tv",
|
||||
"f128_to_f16_rd.tv",
|
||||
"f128_to_f16_rnm.tv",
|
||||
"f16_to_f128_rne.tv",
|
||||
"f16_to_f128_rz.tv",
|
||||
"f16_to_f128_ru.tv",
|
||||
"f16_to_f128_rd.tv",
|
||||
"f16_to_f128_rnm.tv"
|
||||
};
|
||||
|
||||
string f64f32cvt[] = '{
|
||||
"f64_to_f32_rne.tv",
|
||||
"f64_to_f32_rz.tv",
|
||||
"f64_to_f32_ru.tv",
|
||||
"f64_to_f32_rd.tv",
|
||||
"f64_to_f32_rnm.tv",
|
||||
"f32_to_f64_rne.tv",
|
||||
"f32_to_f64_rz.tv",
|
||||
"f32_to_f64_ru.tv",
|
||||
"f32_to_f64_rd.tv",
|
||||
"f32_to_f64_rnm.tv"
|
||||
};
|
||||
|
||||
string f128f32cvt[] = '{
|
||||
"f128_to_f32_rne.tv",
|
||||
"f128_to_f32_rz.tv",
|
||||
"f128_to_f32_ru.tv",
|
||||
"f128_to_f32_rd.tv",
|
||||
"f128_to_f32_rnm.tv",
|
||||
"f32_to_f128_rne.tv",
|
||||
"f32_to_f128_rz.tv",
|
||||
"f32_to_f128_ru.tv",
|
||||
"f32_to_f128_rd.tv",
|
||||
"f32_to_f128_rnm.tv"
|
||||
};
|
||||
|
||||
string f128f64cvt[] = '{
|
||||
"f128_to_f64_rne.tv",
|
||||
"f128_to_f64_rz.tv",
|
||||
"f128_to_f64_ru.tv",
|
||||
"f128_to_f64_rd.tv",
|
||||
"f128_to_f64_rnm.tv",
|
||||
"f64_to_f128_rne.tv",
|
||||
"f64_to_f128_rz.tv",
|
||||
"f64_to_f128_ru.tv",
|
||||
"f64_to_f128_rd.tv",
|
||||
"f64_to_f128_rnm.tv"
|
||||
};
|
||||
|
||||
string f16add[] = '{
|
||||
"f16_add_rne.tv",
|
||||
"f16_add_rz.tv",
|
||||
"f16_add_ru.tv",
|
||||
"f16_add_rd.tv",
|
||||
"f16_add_rnm.tv"
|
||||
};
|
||||
|
||||
string f32add[] = '{
|
||||
"f32_add_rne.tv",
|
||||
"f32_add_rz.tv",
|
||||
"f32_add_ru.tv",
|
||||
"f32_add_rd.tv",
|
||||
"f32_add_rnm.tv"
|
||||
};
|
||||
|
||||
string f64add[] = '{
|
||||
"f64_add_rne.tv",
|
||||
"f64_add_rz.tv",
|
||||
"f64_add_ru.tv",
|
||||
"f64_add_rd.tv",
|
||||
"f64_add_rnm.tv"
|
||||
};
|
||||
|
||||
string f128add[] = '{
|
||||
"f128_add_rne.tv",
|
||||
"f128_add_rz.tv",
|
||||
"f128_add_ru.tv",
|
||||
"f128_add_rd.tv",
|
||||
"f128_add_rnm.tv"
|
||||
};
|
||||
|
||||
string f16sub[] = '{
|
||||
"f16_sub_rne.tv",
|
||||
"f16_sub_rz.tv",
|
||||
"f16_sub_ru.tv",
|
||||
"f16_sub_rd.tv",
|
||||
"f16_sub_rnm.tv"
|
||||
};
|
||||
|
||||
string f32sub[] = '{
|
||||
"f32_sub_rne.tv",
|
||||
"f32_sub_rz.tv",
|
||||
"f32_sub_ru.tv",
|
||||
"f32_sub_rd.tv",
|
||||
"f32_sub_rnm.tv"
|
||||
};
|
||||
|
||||
string f64sub[] = '{
|
||||
"f64_sub_rne.tv",
|
||||
"f64_sub_rz.tv",
|
||||
"f64_sub_ru.tv",
|
||||
"f64_sub_rd.tv",
|
||||
"f64_sub_rnm.tv"
|
||||
};
|
||||
|
||||
string f128sub[] = '{
|
||||
"f128_sub_rne.tv",
|
||||
"f128_sub_rz.tv",
|
||||
"f128_sub_ru.tv",
|
||||
"f128_sub_rd.tv",
|
||||
"f128_sub_rnm.tv"
|
||||
};
|
||||
|
||||
string f16mul[] = '{
|
||||
"f16_mul_rne.tv",
|
||||
"f16_mul_rz.tv",
|
||||
"f16_mul_ru.tv",
|
||||
"f16_mul_rd.tv",
|
||||
"f16_mul_rnm.tv"
|
||||
};
|
||||
|
||||
string f32mul[] = '{
|
||||
"f32_mul_rne.tv",
|
||||
"f32_mul_rz.tv",
|
||||
"f32_mul_ru.tv",
|
||||
"f32_mul_rd.tv",
|
||||
"f32_mul_rnm.tv"
|
||||
};
|
||||
|
||||
string f64mul[] = '{
|
||||
"f64_mul_rne.tv",
|
||||
"f64_mul_rz.tv",
|
||||
"f64_mul_ru.tv",
|
||||
"f64_mul_rd.tv",
|
||||
"f64_mul_rnm.tv"
|
||||
};
|
||||
|
||||
string f128mul[] = '{
|
||||
"f128_mul_rne.tv",
|
||||
"f128_mul_rz.tv",
|
||||
"f128_mul_ru.tv",
|
||||
"f128_mul_rd.tv",
|
||||
"f128_mul_rnm.tv"
|
||||
};
|
||||
|
||||
string f16div[] = '{
|
||||
"f16_div_rne.tv",
|
||||
"f16_div_rz.tv",
|
||||
"f16_div_ru.tv",
|
||||
"f16_div_rd.tv",
|
||||
"f16_div_rnm.tv"
|
||||
};
|
||||
|
||||
string f32div[] = '{
|
||||
"f32_div_rne.tv",
|
||||
"f32_div_rz.tv",
|
||||
"f32_div_ru.tv",
|
||||
"f32_div_rd.tv",
|
||||
"f32_div_rnm.tv"
|
||||
};
|
||||
|
||||
string f64div[] = '{
|
||||
"f64_div_rne.tv",
|
||||
"f64_div_rz.tv",
|
||||
"f64_div_ru.tv",
|
||||
"f64_div_rd.tv",
|
||||
"f64_div_rnm.tv"
|
||||
};
|
||||
|
||||
string f128div[] = '{
|
||||
"f128_div_rne.tv",
|
||||
"f128_div_rz.tv",
|
||||
"f128_div_ru.tv",
|
||||
"f128_div_rd.tv",
|
||||
"f128_div_rnm.tv"
|
||||
};
|
||||
|
||||
string f16sqrt[] = '{
|
||||
"f16_sqrt_rne.tv",
|
||||
"f16_sqrt_rz.tv",
|
||||
"f16_sqrt_ru.tv",
|
||||
"f16_sqrt_rd.tv",
|
||||
"f16_sqrt_rnm.tv"
|
||||
};
|
||||
|
||||
string f32sqrt[] = '{
|
||||
"f32_sqrt_rne.tv",
|
||||
"f32_sqrt_rz.tv",
|
||||
"f32_sqrt_ru.tv",
|
||||
"f32_sqrt_rd.tv",
|
||||
"f32_sqrt_rnm.tv"
|
||||
};
|
||||
|
||||
string f64sqrt[] = '{
|
||||
"f64_sqrt_rne.tv",
|
||||
"f64_sqrt_rz.tv",
|
||||
"f64_sqrt_ru.tv",
|
||||
"f64_sqrt_rd.tv",
|
||||
"f64_sqrt_rnm.tv"
|
||||
};
|
||||
|
||||
string f128sqrt[] = '{
|
||||
"f128_sqrt_rne.tv",
|
||||
"f128_sqrt_rz.tv",
|
||||
"f128_sqrt_ru.tv",
|
||||
"f128_sqrt_rd.tv",
|
||||
"f128_sqrt_rnm.tv"
|
||||
};
|
||||
|
||||
string f16cmp[] = '{
|
||||
"f16_eq_rne.tv",
|
||||
"f16_eq_rz.tv",
|
||||
"f16_eq_ru.tv",
|
||||
"f16_eq_rd.tv",
|
||||
"f16_eq_rnm.tv",
|
||||
"f16_le_rne.tv",
|
||||
"f16_le_rz.tv",
|
||||
"f16_le_ru.tv",
|
||||
"f16_le_rd.tv",
|
||||
"f16_le_rnm.tv",
|
||||
"f16_lt_rne.tv",
|
||||
"f16_lt_rz.tv",
|
||||
"f16_lt_ru.tv",
|
||||
"f16_lt_rd.tv",
|
||||
"f16_lt_rnm.tv"
|
||||
};
|
||||
|
||||
string f32cmp[] = '{
|
||||
"f32_eq_rne.tv",
|
||||
"f32_eq_rz.tv",
|
||||
"f32_eq_ru.tv",
|
||||
"f32_eq_rd.tv",
|
||||
"f32_eq_rnm.tv",
|
||||
"f32_le_rne.tv",
|
||||
"f32_le_rz.tv",
|
||||
"f32_le_ru.tv",
|
||||
"f32_le_rd.tv",
|
||||
"f32_le_rnm.tv",
|
||||
"f32_lt_rne.tv",
|
||||
"f32_lt_rz.tv",
|
||||
"f32_lt_ru.tv",
|
||||
"f32_lt_rd.tv",
|
||||
"f32_lt_rnm.tv"
|
||||
};
|
||||
|
||||
string f64cmp[] = '{
|
||||
"f64_eq_rne.tv",
|
||||
"f64_eq_rz.tv",
|
||||
"f64_eq_ru.tv",
|
||||
"f64_eq_rd.tv",
|
||||
"f64_eq_rnm.tv",
|
||||
"f64_le_rne.tv",
|
||||
"f64_le_rz.tv",
|
||||
"f64_le_ru.tv",
|
||||
"f64_le_rd.tv",
|
||||
"f64_le_rnm.tv",
|
||||
"f64_lt_rne.tv",
|
||||
"f64_lt_rz.tv",
|
||||
"f64_lt_ru.tv",
|
||||
"f64_lt_rd.tv",
|
||||
"f64_lt_rnm.tv"
|
||||
};
|
||||
|
||||
string f128cmp[] = '{
|
||||
"f128_eq_rne.tv",
|
||||
"f128_eq_rz.tv",
|
||||
"f128_eq_ru.tv",
|
||||
"f128_eq_rd.tv",
|
||||
"f128_eq_rnm.tv",
|
||||
"f128_le_rne.tv",
|
||||
"f128_le_rz.tv",
|
||||
"f128_le_ru.tv",
|
||||
"f128_le_rd.tv",
|
||||
"f128_le_rnm.tv",
|
||||
"f128_lt_rne.tv",
|
||||
"f128_lt_rz.tv",
|
||||
"f128_lt_ru.tv",
|
||||
"f128_lt_rd.tv",
|
||||
"f128_lt_rnm.tv"
|
||||
};
|
||||
|
||||
string f16fma[] = '{
|
||||
"f16_mulAdd_rne.tv",
|
||||
"f16_mulAdd_rz.tv",
|
||||
"f16_mulAdd_ru.tv",
|
||||
"f16_mulAdd_rd.tv",
|
||||
"f16_mulAdd_rnm.tv"
|
||||
};
|
||||
|
||||
string f32fma[] = '{
|
||||
"f32_mulAdd_rne.tv",
|
||||
"f32_mulAdd_rz.tv",
|
||||
"f32_mulAdd_ru.tv",
|
||||
"f32_mulAdd_rd.tv",
|
||||
"f32_mulAdd_rnm.tv"
|
||||
};
|
||||
|
||||
string f64fma[] = '{
|
||||
"f64_mulAdd_rne.tv",
|
||||
"f64_mulAdd_rz.tv",
|
||||
"f64_mulAdd_ru.tv",
|
||||
"f64_mulAdd_rd.tv",
|
||||
"f64_mulAdd_rnm.tv"
|
||||
};
|
||||
|
||||
string f128fma[] = '{
|
||||
"f128_mulAdd_rne.tv",
|
||||
"f128_mulAdd_rz.tv",
|
||||
"f128_mulAdd_ru.tv",
|
||||
"f128_mulAdd_rd.tv",
|
||||
"f128_mulAdd_rnm.tv"
|
||||
};
|
||||
|
||||
string int64rem[] = '{
|
||||
"cvw_64_rem-01.tv"
|
||||
};
|
||||
|
||||
string int64div[] = '{
|
||||
"cvw_64_div-01.tv"
|
||||
};
|
||||
|
||||
string int64remu[] = '{
|
||||
"cvw_64_remu-01.tv"
|
||||
};
|
||||
|
||||
string int64divu[] = '{
|
||||
"cvw_64_divu-01.tv"
|
||||
};
|
||||
|
||||
string int64remw[] = '{
|
||||
"cvw_64_remw-01.tv"
|
||||
};
|
||||
|
||||
string int64remuw[] = '{
|
||||
"cvw_64_remuw-01.tv"
|
||||
};
|
||||
|
||||
string int64divuw[] = '{
|
||||
"cvw_64_divuw-01.tv"
|
||||
};
|
||||
|
||||
string int64divw[] = '{
|
||||
"cvw_64_divw-01.tv"
|
||||
};
|
||||
|
||||
string int32rem[] = '{
|
||||
"cvw_32_rem-01.tv"
|
||||
};
|
||||
|
||||
string int32div[] = '{
|
||||
"cvw_32_div-01.tv"
|
||||
};
|
||||
|
||||
string int32remu[] = '{
|
||||
"cvw_32_remu-01.tv"
|
||||
};
|
||||
|
||||
string int32divu[] = '{
|
||||
"cvw_32_divu-01.tv"
|
||||
};
|
112
tests/custom/spitest/Makefile
Normal file
112
tests/custom/spitest/Makefile
Normal file
@ -0,0 +1,112 @@
|
||||
CEXT := c
|
||||
CPPEXT := cpp
|
||||
AEXT := s
|
||||
SEXT := S
|
||||
SRCEXT := \([$(CEXT)$(AEXT)$(SEXT)]\|$(CPPEXT)\)
|
||||
OBJEXT := o
|
||||
DEPEXT := d
|
||||
SRCDIR := .
|
||||
BUILDDIR := OBJ
|
||||
|
||||
SOURCES ?= $(shell find $(SRCDIR) -type f -regex ".*\.$(SRCEXT)" | sort)
|
||||
OBJECTS := $(SOURCES:.$(CEXT)=.$(OBJEXT))
|
||||
OBJECTS := $(OBJECTS:.$(AEXT)=.$(OBJEXT))
|
||||
OBJECTS := $(OBJECTS:.$(SEXT)=.$(OBJEXT))
|
||||
OBJECTS := $(OBJECTS:.$(CPPEXT)=.$(OBJEXT))
|
||||
OBJECTS := $(patsubst $(SRCDIR)/%,$(BUILDDIR)/%,$(OBJECTS))
|
||||
|
||||
TARGETDIR := bin
|
||||
TARGET := $(TARGETDIR)/spitest.elf
|
||||
ROOT := ..
|
||||
LIBRARY_DIRS :=
|
||||
LIBRARY_FILES :=
|
||||
|
||||
MARCH :=-march=rv64imfdc
|
||||
MABI :=-mabi=lp64d
|
||||
LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles
|
||||
LINKER :=$(ROOT)/linker8000-0000.x
|
||||
|
||||
|
||||
AFLAGS =$(MARCH) $(MABI) -W
|
||||
CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2
|
||||
AS=riscv64-unknown-elf-as
|
||||
CC=riscv64-unknown-elf-gcc
|
||||
AR=riscv64-unknown-elf-ar
|
||||
|
||||
|
||||
#Default Make
|
||||
all: directories $(TARGET).memfile
|
||||
|
||||
#Remake
|
||||
remake: clean all
|
||||
|
||||
#Make the Directories
|
||||
directories:
|
||||
@mkdir -p $(TARGETDIR)
|
||||
@mkdir -p $(BUILDDIR)
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR) $(TARGETDIR) *.memfile *.objdump
|
||||
|
||||
|
||||
#Needed for building additional library projects
|
||||
ifdef LIBRARY_DIRS
|
||||
LIBS+=${LIBRARY_DIRS:%=-L%} ${LIBRARY_FILES:%=-l%}
|
||||
INC+=${LIBRARY_DIRS:%=-I%}
|
||||
|
||||
${LIBRARY_DIRS}:
|
||||
make -C $@ -j 1
|
||||
|
||||
.PHONY: $(LIBRARY_DIRS) $(TARGET)
|
||||
endif
|
||||
|
||||
|
||||
#Pull in dependency info for *existing* .o files
|
||||
-include $(OBJECTS:.$(OBJEXT)=.$(DEPEXT))
|
||||
|
||||
#Link
|
||||
$(TARGET): $(OBJECTS) $(LIBRARY_DIRS)
|
||||
$(CC) $(LINK_FLAGS) -g -o $(TARGET) $(OBJECTS) ${LIBS} -T ${LINKER}
|
||||
|
||||
|
||||
#Compile
|
||||
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CEXT)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
|
||||
@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CEXT) > $(BUILDDIR)/$*.$(DEPEXT)
|
||||
@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
|
||||
@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
|
||||
@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
|
||||
@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
|
||||
|
||||
# gcc won't output dependencies for assembly files for some reason
|
||||
# most asm files don't have dependencies so the echo will work for now.
|
||||
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(AEXT)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(CFLAGS) -c -o $@ $< > $(BUILDDIR)/$*.list
|
||||
@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
|
||||
|
||||
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(SEXT)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
|
||||
@echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT)
|
||||
|
||||
# C++
|
||||
$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CPPEXT)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list
|
||||
@$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CPPEXT) > $(BUILDDIR)/$*.$(DEPEXT)
|
||||
@cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp
|
||||
@sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT)
|
||||
@sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT)
|
||||
@rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp
|
||||
|
||||
# convert to hex
|
||||
$(TARGET).memfile: $(TARGET)
|
||||
@echo 'Making object dump file.'
|
||||
@riscv64-unknown-elf-objdump -D $< > $<.objdump
|
||||
@echo 'Making memory file'
|
||||
riscv64-unknown-elf-elf2hex --bit-width 64 --input $^ --output $@
|
||||
extractFunctionRadix.sh $<.objdump
|
||||
mkdir -p ../work/
|
||||
cp -f $(TARGETDIR)/* ../work/
|
116
tests/custom/spitest/spi.h
Normal file
116
tests/custom/spitest/spi.h
Normal file
@ -0,0 +1,116 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// spi.h
|
||||
//
|
||||
// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024
|
||||
//
|
||||
// Purpose: Header file for interfaceing with the SPI peripheral
|
||||
//
|
||||
//
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the
|
||||
// “License”); you may not use this file except in compliance with the
|
||||
// License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work
|
||||
// distributed under the License is distributed on an “AS IS” BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
#ifndef SPI_HEADER
|
||||
#define SPI_HEADER
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define SPI_BASE 0x13000 /* Base address of SPI device used for SDC */
|
||||
|
||||
/* register offsets */
|
||||
#define SPI_SCKDIV SPI_BASE + 0x00 /* Serial clock divisor */
|
||||
#define SPI_SCKMODE SPI_BASE + 0x04 /* Serial clock mode */
|
||||
#define SPI_CSID SPI_BASE + 0x10 /* Chip select ID */
|
||||
#define SPI_CSDEF SPI_BASE + 0x14 /* Chip select default */
|
||||
#define SPI_CSMODE SPI_BASE + 0x18 /* Chip select mode */
|
||||
#define SPI_DELAY0 SPI_BASE + 0x28 /* Delay control 0 */
|
||||
#define SPI_DELAY1 SPI_BASE + 0x2c /* Delay control 1 */
|
||||
#define SPI_FMT SPI_BASE + 0x40 /* Frame format */
|
||||
#define SPI_TXDATA SPI_BASE + 0x48 /* Tx FIFO data */
|
||||
#define SPI_RXDATA SPI_BASE + 0x4c /* Rx FIFO data */
|
||||
#define SPI_TXMARK SPI_BASE + 0x50 /* Tx FIFO [<35;39;29Mwatermark */
|
||||
#define SPI_RXMARK SPI_BASE + 0x54 /* Rx FIFO watermark */
|
||||
|
||||
/* Non-implemented
|
||||
#define SPI_FCTRL SPI_BASE + 0x60 // SPI flash interface control
|
||||
#define SPI_FFMT SPI_BASE + 0x64 // SPI flash instruction format
|
||||
*/
|
||||
#define SPI_IE SPI_BASE + 0x70 /* Interrupt Enable Register */
|
||||
#define SPI_IP SPI_BASE + 0x74 /* Interrupt Pendings Register */
|
||||
|
||||
/* delay0 bits */
|
||||
#define SIFIVE_SPI_DELAY0_CSSCK(x) ((uint32_t)(x))
|
||||
#define SIFIVE_SPI_DELAY0_CSSCK_MASK 0xffU
|
||||
#define SIFIVE_SPI_DELAY0_SCKCS(x) ((uint32_t)(x) << 16)
|
||||
#define SIFIVE_SPI_DELAY0_SCKCS_MASK (0xffU << 16)
|
||||
|
||||
/* delay1 bits */
|
||||
#define SIFIVE_SPI_DELAY1_INTERCS(x) ((uint32_t)(x))
|
||||
#define SIFIVE_SPI_DELAY1_INTERCS_MASK 0xffU
|
||||
#define SIFIVE_SPI_DELAY1_INTERXFR(x) ((uint32_t)(x) << 16)
|
||||
#define SIFIVE_SPI_DELAY1_INTERXFR_MASK (0xffU << 16)
|
||||
|
||||
/* csmode bits */
|
||||
#define SIFIVE_SPI_CSMODE_MODE_AUTO 0U
|
||||
#define SIFIVE_SPI_CSMODE_MODE_HOLD 2U
|
||||
#define SIFIVE_SPI_CSMODE_MODE_OFF 3U
|
||||
|
||||
// inline void write_reg(uintptr_t addr, uint32_t value);
|
||||
//inline uint32_t read_reg(uintptr_t addr);
|
||||
//inline void spi_sendbyte(uint8_t byte);
|
||||
//inline void waittx();
|
||||
//inline void waitrx();
|
||||
uint8_t spi_txrx(uint8_t byte);
|
||||
uint8_t spi_dummy();
|
||||
//inline uint8_t spi_readbyte();
|
||||
//uint64_t spi_read64();
|
||||
void spi_init();
|
||||
void spi_set_clock(uint32_t clkin, uint32_t clkout);
|
||||
|
||||
static inline void write_reg(uintptr_t addr, uint32_t value) {
|
||||
volatile uint32_t * loc = (volatile uint32_t *) addr;
|
||||
*loc = value;
|
||||
}
|
||||
|
||||
// Read a register
|
||||
static inline uint32_t read_reg(uintptr_t addr) {
|
||||
return *(volatile uint32_t *) addr;
|
||||
}
|
||||
|
||||
// Queues a single byte in the transfer fifo
|
||||
static inline void spi_sendbyte(uint8_t byte) {
|
||||
// Write byte to transfer fifo
|
||||
write_reg(SPI_TXDATA, byte);
|
||||
}
|
||||
|
||||
static inline void waittx() {
|
||||
while(!(read_reg(SPI_IP) & 1)) {}
|
||||
}
|
||||
|
||||
static inline void waitrx() {
|
||||
while(read_reg(SPI_IP) & 2) {}
|
||||
}
|
||||
|
||||
static inline uint8_t spi_readbyte() {
|
||||
return read_reg(SPI_RXDATA);
|
||||
}
|
||||
|
||||
#endif
|
107
tests/custom/spitest/spitest.c
Normal file
107
tests/custom/spitest/spitest.c
Normal file
@ -0,0 +1,107 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// spi.c
|
||||
//
|
||||
// Written: Jaocb Pease jacob.pease@okstate.edu 8/27/2024
|
||||
//
|
||||
// Purpose: C code to test SPI bugs
|
||||
//
|
||||
//
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the
|
||||
// “License”); you may not use this file except in compliance with the
|
||||
// License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work
|
||||
// distributed under the License is distributed on an “AS IS” BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
// implied. See the License for the specific language governing
|
||||
// permissions and limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "spi.h"
|
||||
|
||||
// Testing SPI peripheral in loopback mode
|
||||
// TODO: Need to make sure the configuration I'm using uses loopback
|
||||
// mode. This can be specified in derivlists.txt
|
||||
// TODO:
|
||||
|
||||
uint8_t spi_txrx(uint8_t byte) {
|
||||
spi_sendbyte(byte);
|
||||
waittx();
|
||||
return spi_readbyte();
|
||||
}
|
||||
|
||||
uint8_t spi_dummy() {
|
||||
return spi_txrx(0xff);
|
||||
}
|
||||
|
||||
void spi_set_clock(uint32_t clkin, uint32_t clkout) {
|
||||
uint32_t div = (clkin/(2*clkout)) - 1;
|
||||
write_reg(SPI_SCKDIV, div);
|
||||
}
|
||||
|
||||
// Initialize Sifive FU540 based SPI Controller
|
||||
void spi_init(uint32_t clkin) {
|
||||
// Enable interrupts
|
||||
write_reg(SPI_IE, 0x3);
|
||||
|
||||
// Set TXMARK to 1. If the number of entries is < 1
|
||||
// IP's txwm field will go high.
|
||||
// Set RXMARK to 0. If the number of entries is > 0
|
||||
// IP's rwxm field will go high.
|
||||
write_reg(SPI_TXMARK, 1);
|
||||
write_reg(SPI_RXMARK, 0);
|
||||
|
||||
// Set Delay 0 to default
|
||||
write_reg(SPI_DELAY0,
|
||||
SIFIVE_SPI_DELAY0_CSSCK(1) |
|
||||
SIFIVE_SPI_DELAY0_SCKCS(1));
|
||||
|
||||
// Set Delay 1 to default
|
||||
write_reg(SPI_DELAY1,
|
||||
SIFIVE_SPI_DELAY1_INTERCS(1) |
|
||||
SIFIVE_SPI_DELAY1_INTERXFR(0));
|
||||
|
||||
// Initialize the SPI controller clock to
|
||||
// div = (20MHz/(2*400kHz)) - 1 = 24 = 0x18
|
||||
write_reg(SPI_SCKDIV, 0x18);
|
||||
}
|
||||
|
||||
void main() {
|
||||
spi_init(100000000);
|
||||
|
||||
spi_set_clock(100000000,50000000);
|
||||
|
||||
volatile uint8_t *p = (uint8_t *)(0x8F000000);
|
||||
int j;
|
||||
uint64_t n = 0;
|
||||
|
||||
write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_HOLD);
|
||||
//n = 512/8;
|
||||
|
||||
n = 4;
|
||||
do {
|
||||
// Send 8 dummy bytes (fifo should be empty)
|
||||
for (j = 0; j < 8; j++) {
|
||||
spi_sendbyte(0xaa + j);
|
||||
}
|
||||
|
||||
// Reset counter. Process bytes AS THEY COME IN.
|
||||
for (j = 0; j < 8; j++) {
|
||||
while (!(read_reg(SPI_IP) & 2)) {}
|
||||
uint8_t x = spi_readbyte();
|
||||
*p++ = x;
|
||||
}
|
||||
} while(--n > 0);
|
||||
|
||||
write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_AUTO);
|
||||
}
|
59
tests/custom/spitest/start.s
Normal file
59
tests/custom/spitest/start.s
Normal file
@ -0,0 +1,59 @@
|
||||
.section .init
|
||||
.global _start
|
||||
.type _start, @function
|
||||
|
||||
_start:
|
||||
# Initialize global pointer
|
||||
.option push
|
||||
.option norelax
|
||||
1:auipc gp, %pcrel_hi(__global_pointer$)
|
||||
addi gp, gp, %pcrel_lo(1b)
|
||||
.option pop
|
||||
|
||||
li x1, 0
|
||||
li x2, 0
|
||||
li x4, 0
|
||||
li x5, 0
|
||||
li x6, 0
|
||||
li x7, 0
|
||||
li x8, 0
|
||||
li x9, 0
|
||||
li x10, 0
|
||||
li x11, 0
|
||||
li x12, 0
|
||||
li x13, 0
|
||||
li x14, 0
|
||||
li x15, 0
|
||||
li x16, 0
|
||||
li x17, 0
|
||||
li x18, 0
|
||||
li x19, 0
|
||||
li x20, 0
|
||||
li x21, 0
|
||||
li x22, 0
|
||||
li x23, 0
|
||||
li x24, 0
|
||||
li x25, 0
|
||||
li x26, 0
|
||||
li x27, 0
|
||||
li x28, 0
|
||||
li x29, 0
|
||||
li x30, 0
|
||||
li x31, 0
|
||||
|
||||
|
||||
|
||||
# set the stack pointer to the top of memory - 8 bytes (pointer size)
|
||||
li sp, 0x87FFFFF8
|
||||
|
||||
jal ra, main
|
||||
jal ra, _halt
|
||||
|
||||
.section .text
|
||||
.global _halt
|
||||
.type _halt, @function
|
||||
_halt:
|
||||
li gp, 1
|
||||
li a0, 0
|
||||
ecall
|
||||
j _halt
|
@ -1,5 +1,7 @@
|
||||
#!/bin/sh
|
||||
# create test vectors for stand alone int
|
||||
|
||||
mkdir IF_vectors
|
||||
./extract_testfloat_vectors.py
|
||||
./extract_arch_vectors.py
|
||||
cp IF_vectors/* ../vectors
|
||||
|
Loading…
Reference in New Issue
Block a user