diff --git a/README.md b/README.md index 39b5c6780..7e0947412 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,49 @@ +# divremsqrt +This branch contains the relevant hardware and test/synthesis flows for cvw's unified integer/fp divide/sqrt recurrence unit. The recurrence unit can be generated for a variety configurations, which span flavors of radix = {2,4}, floating-point precision = {float,double,quad}, integer width = {unsupported,32,64} and divider copies = {1,2,4,8}. + +The fpu postprocessor on cvw handles inputs not only from the div/sqrt unit, but also the fma and convert units. This branch's drsu unit contains a postprocessor with logic only relevant to division/sqrt. + +# file hiearchy + +The RTL files for the divider can be found under `cvw/src/fpu` + +The majority of divider modules are found in `cvw/src/fpu/divremsqrt`, which also borrows some modules from `cvw/src/fpu/fdivsqrt` + +divremsqrt/drsu desribes the top-level unit for the divider, taking in unpacked floating point signals, including Xs, Xm Xe, Ys, Ym, Ye. + +drsu first feeds signals to `divremsqrt/divremsqrt`, which contains the preprocessor, iteration units, fsm, and postprocessing logic. The postprocessor in `divremsqrt/divremsqrt` also contains all integer postprocessing logic. Outputs from `divremsqrt/divremsqrt` are then sent to `divremsqrt/divremsqrtpostprocess`, which handles rounding and flags. + +# verification flow + +drsu is verified with the risc-v arch test Berkeley SoftFloat floating point suite of test vectors for floating point square-root and division. In order to run the top-level regression script, run `regression-wally-intdiv -intdiv` + +The top-level regression python script is found accordingly in `cvw/bin/regression-wally-intdiv`. The testbench is found in `cvw/testbench/testbench_fp`, which runs drsu against testvectors. Batches of testvectors are stored within `cvw/testbench/tests-fp.vh`, and the raw binary test vectors are read from `tests/fp/vectors` + +Regression log files can be found in `cvw/sim/questa/logs` after running `regression-wally-intdiv -intdiv`. Files are named with `{precision}_ieee_div_{R}_{K}_{integer}_rv{XLEN}gc_{TESTNAME}.log` + +* precision denotes the floating-point precision types supported by the divider: f, fd, fdq, fdqh +* R denotes the radix of the divider: 2,4 +* K denotes the number of divider copies in the unit: 1,2,4,8 +* integer denotes whether integer division/remainder is supported on the divider: i +* XLEN denotes the width of integers: 32, 64 (this only matters if integer is supported on the divider) +* TESTNAME denotes which tests are being run: + * fdivremsqrt: runs fdiv, fsqrt, intdiv, intrem + * fdiv: runs fdiv + * fsqrt: runs fsqrt + + + +# synthesis flow +To run synthesis results for all flavors of the recurrence unit, go to `cvw/synthDC/scripts` and run `python3 synthdrsu.py`. This will execute a python script that runs the installed version of synopsis design compiler on divider permutations for a target frequency of 5GHz and 100MHz. To then pipe area, delay and energy results to a CSV, run `./writeCSV.sh`. Results can then be viewed in `fp-synthresults_reordered.csv` in a format similar to the one presented in the paper. +# start-up steps +1) `git clone --recurse-submodules https://github.com/openhwgroup/cvw.git` +2) `cd cvw` +3) `git checkout divremsqrt` +4) `source ./setup.sh` +5) `make` +6) `/sim/regression-wally -intdiv` + + # core-v-wally Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, including RV32/64, A, B, C, D, F, M, Q, and Zk* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO). Wally is written in SystemVerilog. It passes the [RISC-V Arch Tests](https://github.com/riscv-non-isa/riscv-arch-test) and boots Linux on an FPGA. Configurations range from a minimal RV32E core to a fully featured RV64GC application processor. diff --git a/bin/regression-wally b/bin/regression-wally index cd4844b08..512856602 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -371,6 +371,7 @@ args = parser.parse_args() if (args.nightly): nightMode = "--nightly"; sims = ["questa", "verilator", "vcs"] # exercise all simulators; can omit a sim if no license is available +# sims = ["questa", "verilator"] # exercise all simulators; can omit a sim if no license is available else: nightMode = "" sims = [defaultsim] @@ -512,10 +513,12 @@ def main(): elif args.fcov: TIMEOUT_DUR = 1*60 os.system('rm -f questa/fcov_ucdb/* questa/fcov_logs/* questa/fcov/*') - elif args.nightly: + elif args.buildroot: TIMEOUT_DUR = 60*1440 # 1 day elif args.testfloat: TIMEOUT_DUR = 30*60 # seconds + elif args.nightly: + TIMEOUT_DUR = 30*60 # seconds else: TIMEOUT_DUR = 10*60 # seconds diff --git a/bin/regression-wally-intdiv b/bin/regression-wally-intdiv new file mode 100755 index 000000000..5408e8661 --- /dev/null +++ b/bin/regression-wally-intdiv @@ -0,0 +1,577 @@ +#!/usr/bin/python3 +################################## +# +# regression-wally +# David_Harris@Hmc.edu 25 January 2021 +# Modified by Jarred Allen +# +# Run a regression with multiple configurations in parallel and exit with +# non-zero status code if an error happened, as well as printing human-readable +# output. +# +################################## +import sys,os,shutil +import multiprocessing + + + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +from collections import namedtuple + +WALLY = os.environ.get('WALLY') +regressionDir = WALLY + '/sim' +os.chdir(regressionDir) + +coverage = '-coverage' in sys.argv +fp = '-fp' in sys.argv +nightly = '-nightly' in sys.argv +softfloat = '-softfloat' in sys.argv +intdiv = '-intdiv' in sys.argv + +TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) +# name: the name of this test configuration (used in printing human-readable +# output and picking logfile names) +# cmd: the command to run to test (should include the logfile as '{}', and +# the command needs to write to that file) +# grepstr: the string to grep through the log file for. The test succeeds iff +# grep finds that string in the logfile (is used by grep, so it may +# be any pattern grep accepts, see `man 1 grep` for more info). + +# edit this list to add more test cases +if (nightly): + nightMode = "-nightly"; + configs = [] +else: + nightMode = ""; + configs = [ + TestCase( + name="lints", + variant="all", + cmd="./lint-wally " + nightMode + " | tee {}", + grepstr="lints run with no errors or warnings" + ) + ] + +def getBuildrootTC(boot): + INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM + MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt. + if boot: + name="buildrootboot" + BRcmd="vsim > {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < XLEN +# ["nodcache_rv32gc", ["ahb32"]], +# ["nocache_rv32gc", ["ahb32"]], + ["noicache_rv64gc", ["ahb64"]], + ["nodcache_rv64gc", ["ahb64"]], + ["nocache_rv64gc", ["ahb64"]], + + ### add misaligned tests + + ["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + + ### branch predictor simulation + + # ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + + # ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + + # # btb + # ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + + # # ras + # ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + +# enable floating-point tests when lint is fixed + ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], + ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]], + ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]], + ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]], + ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed + ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]], + ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]], + + + ] + for test in derivconfigtests: + config = test[0]; + tests = test[1]; + if(len(test) >= 4 and test[2] == "configOptions"): + configOptions = test[3] + cmdPrefix = "vsim > {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < " + logname, + grepstr="All Tests completed with 0 errors" + ) + configs.insert(0,fdivremsqrttestcase) + for config in nointdivconfigs: + # div,sqrt test cases for no integer flavor of divider + + name = "div_drsu" + logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log" + divtestcase = TestCase( + name=name, + variant=config, + #cmd="vsim > {} -c < " + logname, + grepstr="All Tests completed with 0 errors" + ) + configs.insert(0,divtestcase) + + name = "sqrt_drsu" + logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config+"_"+name+".log" + sqrttestcase = TestCase( + name=name, + variant=config, + #cmd="vsim > {} -c < " + logname, + grepstr="All Tests completed with 0 errors" + ) + configs.insert(0,sqrttestcase) + +import os +from multiprocessing import Pool, TimeoutError + +def search_log_for_text(text, logfile): + """Search through the given log file for text, returning True if it is found or False if it is not""" + grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile) + return os.system(grepcmd) == 0 + +def run_test_case(config): + testfloatsim = "questa" # change to Verilator when Issue #707 about testfloat not running Verilator is resolved + """Run the given test case, and return 0 if the test suceeds and 1 if it fails""" + #sim_logdir = WALLY+ "/sim/" + sim + "/logs/" + logname = WALLY + "/sim/" + testfloatsim + "/logs/"+config.variant+"_"+config.name+".log" + #logname = "logs/"+config.variant+"_"+config.name+".log" + cmd = config.cmd.format(logname) +# print(cmd) + os.chdir(regressionDir) + os.system(cmd) + if search_log_for_text(config.grepstr, logname): + print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name)) + return 0 + else: + print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name)) + print(" Check %s" % logname) + return 1 + +def main(): + """Run the tests and count the failures""" + global configs, coverage + try: + os.chdir(regressionDir) + os.mkdir("logs") + except: + pass + try: + shutil.rmtree("wkdir") + except: + pass + finally: + os.mkdir("wkdir") + + if '-makeTests' in sys.argv: + os.chdir(regressionDir) + os.system('./make-tests.sh | tee ./logs/make-tests.log') + + if '-all' in sys.argv: + TIMEOUT_DUR = 30*7200 # seconds + configs.append(getBuildrootTC(boot=True)) + elif '-buildroot' in sys.argv: + TIMEOUT_DUR = 30*7200 # seconds + configs=[getBuildrootTC(boot=True)] + elif '-coverage' in sys.argv: + TIMEOUT_DUR = 20*60 # seconds + # Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage. + # Also it is slow to run. + # configs.append(getBuildrootTC(boot=False)) + os.system('rm -f cov/*.ucdb') + elif '-nightly' in sys.argv: + TIMEOUT_DUR = 60*1440 # 1 day + configs.append(getBuildrootTC(boot=False)) + elif '-softfloat' in sys.argv: + TIMEOUT_DUR = 60*60 # seconds + elif '-intdiv' in sys.argv: + TIMEOUT_DUR = 60*60 # seconds + else: + TIMEOUT_DUR = 10*60 # seconds + configs.append(getBuildrootTC(boot=False)) + + # Scale the number of concurrent processes to the number of test cases, but + # max out at a limited number of concurrent processes to not overwhelm the system + with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool: + num_fail = 0 + results = {} + for config in configs: + results[config] = pool.apply_async(run_test_case,(config,)) + for (config,result) in results.items(): + try: + num_fail+=result.get(timeout=TIMEOUT_DUR) + except TimeoutError: + num_fail+=1 + print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR)) + + # Coverage report + if coverage: + os.system('make coverage') + # Count the number of failures + if num_fail: + print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail) + else: + print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}") + return num_fail + +if __name__ == '__main__': + exit(main()) diff --git a/bin/wsim b/bin/wsim index 986957a3c..cef7eca27 100755 --- a/bin/wsim +++ b/bin/wsim @@ -28,6 +28,7 @@ parser.add_argument("--tb", "-t", help="Testbench", choices=["testbench", "testb parser.add_argument("--gui", "-g", help="Simulate with GUI", action="store_true") parser.add_argument("--ccov", "-c", help="Code Coverage", action="store_true") parser.add_argument("--fcov", "-f", help="Functional Coverage, implies lockstep", action="store_true") +parser.add_argument("--fcov2", "-f2", help="Functional Coverage, implies lockstep", action="store_true") parser.add_argument("--fcovrvvi", "-fr", help="Functional Coverage RVVI", action="store_true") parser.add_argument("--args", "-a", help="Optional arguments passed to simulator via $value$plusargs", default="") parser.add_argument("--vcd", "-v", help="Generate testbench.vcd", action="store_true") @@ -66,7 +67,7 @@ if(args.testsuite.endswith('.elf') and args.elf == ""): # No --elf argument; che # Validate arguments -if (args.gui or args.ccov or args.fcov or args.fcovrvvi or args.lockstep): +if (args.gui or args.ccov or args.fcov or args.fcov2 or args.fcovrvvi or args.lockstep): if args.sim not in ["questa", "vcs"]: print("Option only supported for Questa and VCS") exit(1) @@ -81,7 +82,7 @@ if (args.rvvi): if(int(args.locksteplog) >= 1): EnableLog = 1 else: EnableLog = 0 if (args.lockstep): - prefix = "IMPERAS_TOOLS=" + WALLY + "/sim/imperas.ic" + prefix = "IMPERAS_TOOLS=" + WALLY + "/config/"+args.config+"/imperas.ic" if(args.locksteplog != 0): ImperasPlusArgs = " +IDV_TRACE2LOG=" + str(EnableLog) + " +IDV_TRACE2LOG_AFTER=" + str(args.locksteplog) else: ImperasPlusArgs = "" if(args.fcov): @@ -90,6 +91,12 @@ if (args.lockstep): else: EnableLog = 0 ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr; suffix = "" + if(args.fcov2): + CovEnableStr = "1" if int(args.covlog) > 0 else "0"; + if(args.covlog >= 1): EnableLog = 1 + else: EnableLog = 0 + ImperasPlusArgs = " +IDV_TRACE2COV=" + str(EnableLog) + " +TRACE2LOG_AFTER=" + str(args.covlog) + " +TRACE2COV_ENABLE=" + CovEnableStr; + suffix = "" else: CovEnableStr = "" suffix = "--lockstep" @@ -104,6 +111,8 @@ if (args.ccov): flags += " --ccov" if (args.fcov): flags += " --fcov" +if (args.fcov2): + flags += " --fcov2" if (args.fcovrvvi): flags += "--fcovrvvi" diff --git a/config/derivlist.txt b/config/derivlist.txt index ab9ee703f..cac0c8c02 100644 --- a/config/derivlist.txt +++ b/config/derivlist.txt @@ -950,6 +950,9 @@ D_SUPPORTED 0 ZCD_SUPPORTED 0 ZFH_SUPPORTED 0 +deriv f_div_2_8_rv64gc f_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv f_div_4_1_rv64gc div_4_1_rv64gc D_SUPPORTED 0 ZCD_SUPPORTED 0 @@ -982,6 +985,9 @@ D_SUPPORTED 0 ZCD_SUPPORTED 0 ZFH_SUPPORTED 1 +deriv fh_div_2_8_rv32gc fh_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fh_div_4_1_rv32gc div_4_1_rv32gc D_SUPPORTED 0 ZCD_SUPPORTED 0 @@ -1012,6 +1018,9 @@ D_SUPPORTED 0 ZCD_SUPPORTED 0 ZFH_SUPPORTED 1 +deriv fh_div_2_8_rv64gc fh_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fh_div_4_1_rv64gc div_4_1_rv64gc D_SUPPORTED 0 ZCD_SUPPORTED 0 @@ -1038,6 +1047,9 @@ ZFH_SUPPORTED 0 deriv fd_div_2_4_rv32gc div_2_4_rv32gc ZFH_SUPPORTED 0 +deriv fd_div_2_8_rv32gc fd_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fd_div_4_1_rv32gc div_4_1_rv32gc ZFH_SUPPORTED 0 @@ -1056,6 +1068,9 @@ ZFH_SUPPORTED 0 deriv fd_div_2_4_rv64gc div_2_4_rv64gc ZFH_SUPPORTED 0 +deriv fd_div_2_8_rv64gc fd_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fd_div_4_1_rv64gc div_4_1_rv64gc ZFH_SUPPORTED 0 @@ -1077,6 +1092,9 @@ ZFH_SUPPORTED 1 deriv fdh_div_2_4_rv32gc div_2_4_rv32gc ZFH_SUPPORTED 1 +deriv fdh_div_2_8_rv32gc fdh_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fdh_div_4_1_rv32gc div_4_1_rv32gc ZFH_SUPPORTED 1 @@ -1095,6 +1113,9 @@ ZFH_SUPPORTED 1 deriv fdh_div_2_4_rv64gc div_2_4_rv64gc ZFH_SUPPORTED 1 +deriv fdh_div_2_8_rv64gc fdh_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fdh_div_4_1_rv64gc div_4_1_rv64gc ZFH_SUPPORTED 1 @@ -1118,6 +1139,9 @@ deriv fdq_div_2_4_rv32gc div_2_4_rv32gc Q_SUPPORTED 1 ZFH_SUPPORTED 0 +deriv fdq_div_2_8_rv32gc fdq_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fdq_div_4_1_rv32gc div_4_1_rv32gc Q_SUPPORTED 1 ZFH_SUPPORTED 0 @@ -1142,6 +1166,9 @@ deriv fdq_div_2_4_rv64gc div_2_4_rv64gc Q_SUPPORTED 1 ZFH_SUPPORTED 0 +deriv fdq_div_2_8_rv64gc fdq_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fdq_div_4_1_rv64gc div_4_1_rv64gc Q_SUPPORTED 1 ZFH_SUPPORTED 0 @@ -1168,6 +1195,9 @@ deriv fdqh_div_2_4_rv32gc div_2_4_rv32gc Q_SUPPORTED 1 ZFH_SUPPORTED 1 +deriv fdqh_div_2_8_rv32gc fdqh_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fdqh_div_4_1_rv32gc div_4_1_rv32gc Q_SUPPORTED 1 ZFH_SUPPORTED 1 @@ -1192,6 +1222,9 @@ deriv fdqh_div_2_4_rv64gc div_2_4_rv64gc Q_SUPPORTED 1 ZFH_SUPPORTED 1 +deriv fdqh_div_2_8_rv64gc fdqh_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fdqh_div_4_1_rv64gc div_4_1_rv64gc Q_SUPPORTED 1 ZFH_SUPPORTED 1 @@ -1215,6 +1248,9 @@ IEEE754 1 deriv f_ieee_div_2_4_rv32gc f_div_2_4_rv32gc IEEE754 1 +deriv f_ieee_div_2_8_rv32gc f_ieee_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv f_ieee_div_4_1_rv32gc f_div_4_1_rv32gc IEEE754 1 @@ -1233,6 +1269,9 @@ IEEE754 1 deriv f_ieee_div_2_4_rv64gc f_div_2_4_rv64gc IEEE754 1 +deriv f_ieee_div_2_8_rv64gc f_ieee_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv f_ieee_div_4_1_rv64gc f_div_4_1_rv64gc IEEE754 1 @@ -1252,6 +1291,9 @@ IEEE754 1 deriv fh_ieee_div_2_4_rv32gc fh_div_2_4_rv32gc IEEE754 1 +deriv fh_ieee_div_2_8_rv32gc fh_ieee_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fh_ieee_div_4_1_rv32gc fh_div_4_1_rv32gc IEEE754 1 @@ -1270,6 +1312,9 @@ IEEE754 1 deriv fh_ieee_div_2_4_rv64gc fh_div_2_4_rv64gc IEEE754 1 +deriv fh_ieee_div_2_8_rv64gc fh_ieee_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fh_ieee_div_4_1_rv64gc fh_div_4_1_rv64gc IEEE754 1 @@ -1289,6 +1334,9 @@ IEEE754 1 deriv fd_ieee_div_2_4_rv32gc fd_div_2_4_rv32gc IEEE754 1 +deriv fd_ieee_div_2_8_rv32gc fd_ieee_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fd_ieee_div_4_1_rv32gc fd_div_4_1_rv32gc IEEE754 1 @@ -1307,6 +1355,9 @@ IEEE754 1 deriv fd_ieee_div_2_4_rv64gc fd_div_2_4_rv64gc IEEE754 1 +deriv fd_ieee_div_2_8_rv64gc fd_ieee_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fd_ieee_div_4_1_rv64gc fd_div_4_1_rv64gc IEEE754 1 @@ -1327,6 +1378,9 @@ IEEE754 1 deriv fdh_ieee_div_2_4_rv32gc fdh_div_2_4_rv32gc IEEE754 1 +deriv fdh_ieee_div_2_8_rv32gc fdh_ieee_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fdh_ieee_div_4_1_rv32gc fdh_div_4_1_rv32gc IEEE754 1 @@ -1345,6 +1399,9 @@ IEEE754 1 deriv fdh_ieee_div_2_4_rv64gc fdh_div_2_4_rv64gc IEEE754 1 +deriv fdh_ieee_div_2_8_rv64gc fdh_ieee_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fdh_ieee_div_4_1_rv64gc fdh_div_4_1_rv64gc IEEE754 1 @@ -1364,6 +1421,9 @@ IEEE754 1 deriv fdq_ieee_div_2_4_rv32gc fdq_div_2_4_rv32gc IEEE754 1 +deriv fdq_ieee_div_2_8_rv32gc fdq_ieee_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fdq_ieee_div_4_1_rv32gc fdq_div_4_1_rv32gc IEEE754 1 @@ -1382,6 +1442,9 @@ IEEE754 1 deriv fdq_ieee_div_2_4_rv64gc fdq_div_2_4_rv64gc IEEE754 1 +deriv fdq_ieee_div_2_8_rv64gc fdq_ieee_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fdq_ieee_div_4_1_rv64gc fdq_div_4_1_rv64gc IEEE754 1 @@ -1402,6 +1465,9 @@ IEEE754 1 deriv fdqh_ieee_div_2_4_rv32gc fdqh_div_2_4_rv32gc IEEE754 1 +deriv fdqh_ieee_div_2_8_rv32gc fdqh_ieee_div_2_4_rv32gc +DIVCOPIES 32'd8 + deriv fdqh_ieee_div_4_1_rv32gc fdqh_div_4_1_rv32gc IEEE754 1 @@ -1420,6 +1486,9 @@ IEEE754 1 deriv fdqh_ieee_div_2_4_rv64gc fdqh_div_2_4_rv64gc IEEE754 1 +deriv fdqh_ieee_div_2_8_rv64gc fdqh_ieee_div_2_4_rv64gc +DIVCOPIES 32'd8 + deriv fdqh_ieee_div_4_1_rv64gc fdqh_div_4_1_rv64gc IEEE754 1 @@ -1440,6 +1509,9 @@ IDIV_ON_FPU 1 deriv f_ieee_div_2_4i_rv32gc f_ieee_div_2_4_rv32gc IDIV_ON_FPU 1 +deriv f_ieee_div_2_8i_rv32gc f_ieee_div_2_4i_rv32gc +DIVCOPIES 32'd8 + deriv f_ieee_div_4_1i_rv32gc f_ieee_div_4_1_rv32gc IDIV_ON_FPU 1 @@ -1458,6 +1530,9 @@ IDIV_ON_FPU 1 deriv f_ieee_div_2_4i_rv64gc f_ieee_div_2_4_rv64gc IDIV_ON_FPU 1 +deriv f_ieee_div_2_8i_rv64gc f_ieee_div_2_4i_rv64gc +DIVCOPIES 32'd8 + deriv f_ieee_div_4_1i_rv64gc f_ieee_div_4_1_rv64gc IDIV_ON_FPU 1 @@ -1477,6 +1552,9 @@ IDIV_ON_FPU 1 deriv fh_ieee_div_2_4i_rv32gc fh_ieee_div_2_4_rv32gc IDIV_ON_FPU 1 +deriv fh_ieee_div_2_8i_rv32gc fh_ieee_div_2_4i_rv32gc +DIVCOPIES 32'd8 + deriv fh_ieee_div_4_1i_rv32gc fh_ieee_div_4_1_rv32gc IDIV_ON_FPU 1 @@ -1495,6 +1573,9 @@ IDIV_ON_FPU 1 deriv fh_ieee_div_2_4i_rv64gc fh_ieee_div_2_4_rv64gc IDIV_ON_FPU 1 +deriv fh_ieee_div_2_8i_rv64gc fh_ieee_div_2_4i_rv64gc +DIVCOPIES 32'd8 + deriv fh_ieee_div_4_1i_rv64gc fh_ieee_div_4_1_rv64gc IDIV_ON_FPU 1 @@ -1515,6 +1596,9 @@ IDIV_ON_FPU 1 deriv fd_ieee_div_2_4i_rv32gc fd_ieee_div_2_4_rv32gc IDIV_ON_FPU 1 +deriv fd_ieee_div_2_8i_rv32gc fd_ieee_div_2_4i_rv32gc +DIVCOPIES 32'd8 + deriv fd_ieee_div_4_1i_rv32gc fd_ieee_div_4_1_rv32gc IDIV_ON_FPU 1 @@ -1533,6 +1617,9 @@ IDIV_ON_FPU 1 deriv fd_ieee_div_2_4i_rv64gc fd_ieee_div_2_4_rv64gc IDIV_ON_FPU 1 +deriv fd_ieee_div_2_8i_rv64gc fd_ieee_div_2_4i_rv64gc +DIVCOPIES 32'd8 + deriv fd_ieee_div_4_1i_rv64gc fd_ieee_div_4_1_rv64gc IDIV_ON_FPU 1 @@ -1553,6 +1640,9 @@ IDIV_ON_FPU 1 deriv fdh_ieee_div_2_4i_rv32gc fdh_ieee_div_2_4_rv32gc IDIV_ON_FPU 1 +deriv fdh_ieee_div_2_8i_rv32gc fdh_ieee_div_2_4i_rv32gc +DIVCOPIES 32'd8 + deriv fdh_ieee_div_4_1i_rv32gc fdh_ieee_div_4_1_rv32gc IDIV_ON_FPU 1 @@ -1571,6 +1661,9 @@ IDIV_ON_FPU 1 deriv fdh_ieee_div_2_4i_rv64gc fdh_ieee_div_2_4_rv64gc IDIV_ON_FPU 1 +deriv fdh_ieee_div_2_8i_rv64gc fdh_ieee_div_2_4i_rv64gc +DIVCOPIES 32'd8 + deriv fdh_ieee_div_4_1i_rv64gc fdh_ieee_div_4_1_rv64gc IDIV_ON_FPU 1 @@ -1591,6 +1684,9 @@ IDIV_ON_FPU 1 deriv fdq_ieee_div_2_4i_rv32gc fdq_ieee_div_2_4_rv32gc IDIV_ON_FPU 1 +deriv fdq_ieee_div_2_8i_rv32gc fdq_ieee_div_2_4i_rv32gc +DIVCOPIES 32'd8 + deriv fdq_ieee_div_4_1i_rv32gc fdq_ieee_div_4_1_rv32gc IDIV_ON_FPU 1 @@ -1609,6 +1705,9 @@ IDIV_ON_FPU 1 deriv fdq_ieee_div_2_4i_rv64gc fdq_ieee_div_2_4_rv64gc IDIV_ON_FPU 1 +deriv fdq_ieee_div_2_8i_rv64gc fdq_ieee_div_2_4i_rv64gc +DIVCOPIES 32'd8 + deriv fdq_ieee_div_4_1i_rv64gc fdq_ieee_div_4_1_rv64gc IDIV_ON_FPU 1 @@ -1629,6 +1728,9 @@ IDIV_ON_FPU 1 deriv fdqh_ieee_div_2_4i_rv32gc fdqh_ieee_div_2_4_rv32gc IDIV_ON_FPU 1 +deriv fdqh_ieee_div_2_8i_rv32gc fdqh_ieee_div_2_4i_rv32gc +DIVCOPIES 32'd8 + deriv fdqh_ieee_div_4_1i_rv32gc fdqh_ieee_div_4_1_rv32gc IDIV_ON_FPU 1 @@ -1647,6 +1749,9 @@ IDIV_ON_FPU 1 deriv fdqh_ieee_div_2_4i_rv64gc fdqh_ieee_div_2_4_rv64gc IDIV_ON_FPU 1 +deriv fdqh_ieee_div_2_8i_rv64gc fdqh_ieee_div_2_4i_rv64gc +DIVCOPIES 32'd8 + deriv fdqh_ieee_div_4_1i_rv64gc fdqh_ieee_div_4_1_rv64gc IDIV_ON_FPU 1 diff --git a/sim/imperas.ic b/config/rv32gc/imperas.ic similarity index 94% rename from sim/imperas.ic rename to config/rv32gc/imperas.ic index aee25eabf..c9b8292f2 100644 --- a/sim/imperas.ic +++ b/config/rv32gc/imperas.ic @@ -9,6 +9,7 @@ #--showcommands # Core settings +--variant RV32GC # for RV32GC --override cpu/priv_version=1.12 --override cpu/user_version=20191213 # arch @@ -38,11 +39,12 @@ --override lr_sc_grain=8 # Za64rs requires <=64; we use native word size # 64 KiB continuous huge pages supported ---override cpu/Svpbmt=T ---override cpu/Svnapot_page_mask=65536 +#--override cpu/Svpbmt=F +#--override cpu/Svnapot_page_mask=65536 -# SV39 and SV48 supported ---override cpu/Sv_modes=768 +# SV32 supported +--override cpu/Sv_modes=3 +#--showoverrides --override cpu/Svinval=T @@ -59,7 +61,7 @@ --override cpu/reset_address=0x80000000 ---override cpu/unaligned=T # Zicclsm (should be true) +--override cpu/unaligned=F # Zicclsm (should be true) --override cpu/ignore_non_leaf_DAU=1 --override cpu/wfi_is_nop=T --override cpu/misa_Extensions_mask=0x0 # MISA not writable @@ -74,7 +76,7 @@ --override cpu/PMP_undefined=T # mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception ---override cpu/mstatus_fs_mode=rvfs_write_nz +--override cpu/mstatus_fs_mode=write_1 # PMA Settings # 'r': read access allowed diff --git a/config/rv64gc/imperas.ic b/config/rv64gc/imperas.ic new file mode 100644 index 000000000..fa9b56dfc --- /dev/null +++ b/config/rv64gc/imperas.ic @@ -0,0 +1,117 @@ +# imperas.ic +# Initialization file for ImperasDV lock step simulation +# David_Harris@hmc.edu 15 August 2024 +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +#--mpdconsole +#--gdbconsole +#--showoverrides +#--showcommands + +# Core settings +--override cpu/priv_version=1.12 +--override cpu/user_version=20191213 +# arch +--override cpu/mimpid=0x100 +--override cpu/mvendorid=0x602 +--override cpu/marchid=0x24 +--override refRoot/cpu/tvec_align=64 +--override refRoot/cpu/envcfg_mask=1 # dh 1/26/24 this should be deleted when ImperasDV is updated to allow envcfg.FIOM to be written + +# bit manipulation +--override cpu/add_Extensions=B +--override cpu/bitmanip_version=1.0.0 +--override cpu/misa_B_Zba_Zbb_Zbs=T + +# More extensions +--override cpu/Zcb=T +--override cpu/Zicond=T +--override cpu/Zfh=T +--override cpu/Zfa=T + +# Cache block operations +--override cpu/Zicbom=T +--override cpu/Zicbop=T +--override cpu/Zicboz=T +--override cmomp_bytes=64 # Zic64b +--override cmoz_bytes=64 # Zic64b +--override lr_sc_grain=8 # Za64rs requires <=64; we use native word size + +# 64 KiB continuous huge pages supported +--override cpu/Svpbmt=T +--override cpu/Svnapot_page_mask=65536 + +# SV39 and SV48 supported +--override cpu/Sv_modes=768 + +--override cpu/Svinval=T + + +# clarify +#--override refRoot/cpu/mtvec_sext=F + +--override cpu/tval_ii_code=T + +#--override cpu/time_undefined=T +#--override cpu/cycle_undefined=T +#--override cpu/instret_undefined=T +#--override cpu/hpmcounter_undefined=T + +--override cpu/reset_address=0x80000000 + +--override cpu/unaligned=T # Zicclsm (should be true) +--override cpu/ignore_non_leaf_DAU=1 +--override cpu/wfi_is_nop=T +--override cpu/misa_Extensions_mask=0x0 # MISA not writable +--override cpu/Sstc=T + +# Enable SVADU hardware update of A/D bits when menvcfg.ADUE=1 +--override cpu/Svadu=T +#--override cpu/updatePTEA=F +#--override cpu/updatePTED=F + +--override cpu/PMP_registers=16 +--override cpu/PMP_undefined=T + +# mstatus.FS is set dirty on any write to a FPR, or when a fp operation signals an exception +--override cpu/mstatus_fs_mode=write_1 + +# PMA Settings +# 'r': read access allowed +# 'w': write access allowed +# 'x': execute access allowed +# 'a': aligned access required +# 'A': atomic instructions NOT allowed (actually USER1 privilege needed) +# 'P': push/pop instructions NOT allowed (actually USER2 privilege needed) +# '1': 1-byte accesses allowed +# '2': 2-byte accesses allowed +# '4': 4-byte accesses allowed +# '8': 8-byte accesses allowed +# '-', space: ignored (use for input string formatting). +# +# SVxx Memory 0x0000000000 0x7FFFFFFFFF +# +--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0xFFFFFFFFFFFFFFFFFF -attributes " ---a-- ---- " # All memory inaccessible unless defined otherwise +--callcommand refRoot/cpu/setPMA -lo 0x0000000000 -hi 0x7FFFFFFFFF -attributes " ---a-- ---- " # INITIAL +--callcommand refRoot/cpu/setPMA -lo 0x0000001000 -hi 0x0000001FFF -attributes " r-x-A- 1248 " # BOOTROM +--callcommand refRoot/cpu/setPMA -lo 0x0000012100 -hi 0x000001211F -attributes " rw-aA- --48 " # SDC +--callcommand refRoot/cpu/setPMA -lo 0x0002000000 -hi 0x000200FFFF -attributes " rw-aA- 1248 " # CLINT +--callcommand refRoot/cpu/setPMA -lo 0x000C000000 -hi 0x000FFFFFFF -attributes " rw-aA- --4- " # PLIC +--callcommand refRoot/cpu/setPMA -lo 0x0010000000 -hi 0x0010000007 -attributes " rw-aA- 1--- " # UART0 error - 0x10000000 - 0x100000FF +--callcommand refRoot/cpu/setPMA -lo 0x0010060000 -hi 0x00100600FF -attributes " rw-aA- --4- " # GPIO error - 0x10069000 - 0x100600FF +--callcommand refRoot/cpu/setPMA -lo 0x0010040000 -hi 0x0010040FFF -attributes " rw-aA- --4- " # SPI error - 0x10040000 - 0x10040FFF +--callcommand refRoot/cpu/setPMA -lo 0x0080000000 -hi 0x008FFFFFFF -attributes " rwx--- 1248 " # UNCORE_RAM + +# Enable the Imperas instruction coverage +#-extlib refRoot/cpu/cv=imperas.com/intercept/riscvInstructionCoverage/1.0 +#-override refRoot/cpu/cv/cover=basic +#-override refRoot/cpu/cv/extensions=RV32I + +# Add Imperas simulator application instruction tracing +# uncomment these to provide tracing +#--verbose --trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange # --traceafter 300000000 +#--override cpu/debugflags=6 --override cpu/verbose=1 +#--override cpu/show_c_prefix=T + +# Store simulator output to logfile +--output imperas.log diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 91e1d4100..445dc392f 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) +localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF)); +localparam NORMSHIFTSZDRSU = DIVb+1+NF; +localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU); + // Disable spurious Verilator warnings /* verilator lint_off STMTDLY */ diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index c80b00232..bb036c94d 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -194,6 +194,8 @@ localparam cvw_t P = '{ FMALEN : FMALEN, NORMSHIFTSZ : NORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, + NORMSHIFTSZDRSU : NORMSHIFTSZDRSU, + LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU, LOGR : LOGR, RK : RK, FPDUR : FPDUR, diff --git a/fpga/zsbl/boot.c b/fpga/zsbl/boot.c index b21c49f48..d9a824a49 100644 --- a/fpga/zsbl/boot.c +++ b/fpga/zsbl/boot.c @@ -52,6 +52,42 @@ when 8 bytes are transferred */ +// crc16 table to reduce byte processing time +static const uint16_t crctable[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, + 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, + 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, + 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, + 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, + 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, + 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, + 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, + 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, + 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, + 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, + 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, + 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, + 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, + 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, + 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, + 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, + 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, + 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, + 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, + 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, + 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0 +}; + int disk_read(BYTE * buf, LBA_t sector, UINT count) { uint64_t r; UINT i, j; @@ -86,6 +122,7 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) { for (i = 0; i < count; i++) { uint16_t crc, crc_exp; uint64_t n = 0; + uint64_t readCount = 0; // Wait for data token while((r = spi_dummy()) != SD_DATA_TOKEN); @@ -98,21 +135,45 @@ int disk_read(BYTE * buf, LBA_t sector, UINT count) { /* crc = crc16(crc, x); */ /* } while (--n > 0); */ - n = 512/8; - do { - // Send 8 dummy bytes (fifo should be empty) - for (j = 0; j < 8; j++) { + /* n = 512/8; */ + /* do { */ + /* // Send 8 dummy bytes (fifo should be empty) */ + /* for (j = 0; j < 8; j++) { */ + /* spi_sendbyte(0xff); */ + /* } */ + + /* // Reset counter. Process bytes AS THEY COME IN. */ + /* for (j = 0; j < 8; j++) { */ + /* while (!(read_reg(SPI_IP) & 2)) {} */ + /* uint8_t x = spi_readbyte(); */ + /* *p++ = x; */ + /* // crc = crc16(crc, x); */ + /* crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff; */ + /* } */ + /* } while(--n > 0); */ + + n = 512; + // Initially fill the transmit fifo + for (j = 0; j < 8; j++) { + spi_sendbyte(0xff); + } + + + while (n > 0) { + // Wait for bytes to be received + while (!(read_reg(SPI_IP) & 2)) {} + // Read byte + uint8_t x = spi_readbyte(); + // Send another dummy byte + if (n > 8) { spi_sendbyte(0xff); } - - // Reset counter. Process bytes AS THEY COME IN. - for (j = 0; j < 8; j++) { - while (!(read_reg(SPI_IP) & 2)) {} - uint8_t x = spi_readbyte(); - *p++ = x; - crc = crc16(crc, x); - } - } while(--n > 0); + // Place received byte into memory + *p++ = x; + // Update CRC16 with fast table based method + crc = ((crc << 8) ^ crctable[x ^ (crc >> 8)]) & 0xffff; + n = n - 1; + } // Read CRC16 and check crc_exp = ((uint16_t)spi_dummy() << 8); diff --git a/fpga/zsbl/spi.h b/fpga/zsbl/spi.h index d2bf1191c..f9e88fa6d 100644 --- a/fpga/zsbl/spi.h +++ b/fpga/zsbl/spi.h @@ -1,3 +1,32 @@ +/////////////////////////////////////////////////////////////////////// +// spi.h +// +// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024 +// +// Purpose: Header file for interfaceing with the SPI peripheral +// +// +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the +// “License”); you may not use this file except in compliance with the +// License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +/////////////////////////////////////////////////////////////////////// + #pragma once #ifndef SPI_HEADER #define SPI_HEADER diff --git a/sim/questa/wally.do b/sim/questa/wally.do index 0f67cee37..80a7bef76 100644 --- a/sim/questa/wally.do +++ b/sim/questa/wally.do @@ -148,11 +148,33 @@ if {$FunctCoverageIndex >= 0} { set FCdefineIDV_TRACE2COV "+IDV_TRACE2COV=1" set lst [lreplace $lst $FunctCoverageIndex $FunctCoverageIndex] }\ + +set FunctCoverageIndex2 [lsearch -exact $lst "--fcov2"] +if {$FunctCoverageIndex2 >= 0} { + set FunctCoverage 1 + set riscvISACOVsrc +incdir+$env(IMPERAS_HOME)/ImpProprietary/source/host/riscvISACOV/source + + set FCdefineINCLUDE_TRACE2COV "+define+INCLUDE_TRACE2COV" + set FCdefineCOVER_BASE_RV64I "+define+COVER_BASE_RV64I" + set FCdefineCOVER_LEVEL_DV_PR_EXT "+define+COVER_LEVEL_DV_PR_EXT" + # Uncomment various cover statements below to control which extensions get functional coverage + set FCdefineCOVER_RV64I "+define+COVER_RV64I" + #set FCdefineCOVER_RV64M "+define+COVER_RV64M" + #set FCdefineCOVER_RV64A "+define+COVER_RV64A" + #set FCdefineCOVER_RV64F "+define+COVER_RV64F" + #set FCdefineCOVER_RV64D "+define+COVER_RV64D" + #set FCdefineCOVER_RV64ZICSR "+define+COVER_RV64ZICSR" + #set FCdefineCOVER_RV64C "+define+COVER_RV64C" + set FCdefineIDV_INCLUDE_TRACE2COV "+define+IDV_INCLUDE_TRACE2COV" + set FCTRACE2COV "+TRACE2COV_ENABLE=1" + set FCdefineIDV_TRACE2COV "+IDV_TRACE2COV=1" + set lst [lreplace $lst $FunctCoverageIndex2 $FunctCoverageIndex2] +}\ set LockStepIndex [lsearch -exact $lst "--lockstep"] # ugh. can't have more than 9 arguments passed to vsim. why? I'll have to remove --lockstep when running # functional coverage and imply it. -if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0} { +if {$LockStepIndex >= 0 || $FunctCoverageIndex >= 0 || $FunctCoverageIndex2 >= 0} { set lockstep 1 # ideally this would all be one or two variables, but questa is having a real hard time diff --git a/site-setup.sh b/site-setup.sh index e2affd031..de10a758e 100755 --- a/site-setup.sh +++ b/site-setup.sh @@ -11,6 +11,7 @@ # Must edit these based on your local environment. export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu # Change this to your Siemens license server for Questa export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server +export IMPERASD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Imperas license server export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim # Change this for your path to Questa, excluding bin export DC_HOME=/cad/synopsys/SYN # Change this for your path to Synopsys Design Compiler, excluding bin export VCS_HOME=/cad/synopsys/vcs/U-2023.03-SP2-4 # Change this for your path to Synopsys VCS, excluding bin diff --git a/src/cvw.sv b/src/cvw.sv index ed0493484..94006274b 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -285,6 +285,8 @@ typedef struct packed { int LOGCVTLEN; int NORMSHIFTSZ; int LOGNORMSHIFTSZ; + int NORMSHIFTSZDRSU; + int LOGNORMSHIFTSZDRSU; int FMALEN; // division constants diff --git a/src/fpu/divremsqrt/arithrightshift.sv b/src/fpu/divremsqrt/arithrightshift.sv new file mode 100644 index 000000000..624a54751 --- /dev/null +++ b/src/fpu/divremsqrt/arithrightshift.sv @@ -0,0 +1,9 @@ + +module arithrightshift import cvw::*; #(parameter cvw_t P) ( + input logic signed [P.INTDIVb+3:0] shiftin, + output logic signed [P.INTDIVb+3:0] shifted +); + assign shifted = $signed(shiftin) >>> P.LOGR; + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv new file mode 100644 index 000000000..c21267070 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrt.sv @@ -0,0 +1,110 @@ +/////////////////////////////////////////// +// divremsqrt.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + + module divremsqrt import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic [P.FMTBITS-1:0] FmtE, + input logic XsE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + output logic DivStickyM, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [P.NE+1:0] UeM, + output logic [P.DIVb:0] UmM, + output logic [P.XLEN-1:0] FIntDivResultM, + output logic IntDivM, + // integer normalization shifter signals + output logic [P.INTDIVb+3:0] PreResultM, + input logic [P.XLEN-1:0] PreIntResultM, + output logic [P.DIVBLEN-1:0] IntNormShiftM + +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker + logic WZeroE; // Early termination flag + logic [P.DURLEN:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM; // Special handling for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + + + divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc( // Preprocessor + .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), + .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, + // Int-specific + .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, + .BZeroM, .AM, + .IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM); + + fdivsqrtfsm #(P) fdivsqrtfsm( // FSM + .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, + .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, + // Int-specific + .IDivStartE, .ISpecialCaseE, .IntDivE); + + fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator + .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, + .FirstU, .FirstUM, .FirstC, .FirstWS(WS), .FirstWC(WC)); + + divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor + .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, + .SqrtE, .SqrtM, .SpecialCaseM, + .UmM, .WZeroE, .DivStickyM, + // Int-specific + .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .FIntDivResultM, .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM); + + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv new file mode 100644 index 000000000..640735bef --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv @@ -0,0 +1,73 @@ +/////////////////////////////////////////// +// divshiftcalc.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Division shift calculation +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtdivshiftcalc import cvw::*; #(parameter cvw_t P) ( + input logic [P.NF+2:0] DivUm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent + output logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt, // divsqrt shift amount + output logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn, // divsqrt shift input + output logic DivResSubnorm, // is the divsqrt result subnormal + output logic DivSubnormShiftPos // is the subnormal shift amount positive +); + + logic [P.LOGNORMSHIFTSZDRSU-1:0] NormShift; // normalized result shift amount + logic [P.LOGNORMSHIFTSZDRSU-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negative) + logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount + + // is the result subnormal + // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes + assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]); + + // if the result is subnormal + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1 + // .0000xxxxxxxxxxx... >> 1 Exp = 1 + // Left shift amount = DivUe+NF+1-1 + assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe; + assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1]; + + // if the result is normalized + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivUe+1 + // 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after) + // inital Left shift amount = NF + // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit + assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF); + + // if the shift amount is negative then don't shift (keep sticky bit) + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0; + assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; + + // pre-shift the divider result for normalization + assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}}; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtearlyterm.sv b/src/fpu/divremsqrt/divremsqrtearlyterm.sv new file mode 100644 index 000000000..464dfdafa --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtearlyterm.sv @@ -0,0 +1,27 @@ +module divremsqrtearlyterm import cvw::*; #(parameter cvw_t P) ( + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+3:0] D, // Q4.DIVb + input logic [P.DIVb:0] FirstUM, // U1.DIVb + input logic [P.DIVb+1:0] FirstC, // Q2.DIVb + input logic SqrtE, + output logic WZeroE +); + logic weq0E; + aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E); + if (P.RADIX == 2) begin: R2EarlyTerm + logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE; + logic [P.DIVb+2:0] FirstK; + logic wfeq0E; + logic [P.DIVb+3:0] WCF, WSF; + + assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1)); + assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root + assign FZeroDivE = D << 1; // F for divide + mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE); + csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; + aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E); + assign WZeroE = weq0E|wfeq0E; + end else begin + assign WZeroE = weq0E; + end +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv new file mode 100644 index 000000000..9ca4ef503 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtcycles.sv @@ -0,0 +1,83 @@ +/////////////////////////////////////////// +// fdivsqrtcycles.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu +// Modified: 18 April 2022 +// +// Purpose: Determine number of cycles for divsqrt +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtfdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] FmtE, + input logic SqrtE, + input logic IntDivE, + input logic [P.DIVBLEN-1:0] IntResultBitsE, + output logic [P.DURLEN:0] CyclesE +); + + logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits + + /* verilator lint_off WIDTH */ + if (P.FPSIZES == 1) + assign Nf = P.NF; + else if (P.FPSIZES == 2) + always_comb + case (FmtE) + 1'b0: Nf = P.NF1; + 1'b1: Nf = P.NF; + endcase + else if (P.FPSIZES == 3) + always_comb + case (FmtE) + P.FMT: Nf = P.NF; + P.FMT1: Nf = P.NF1; + P.FMT2: Nf = P.NF2; + default: Nf = 'x; // shouldn't happen + endcase + else if (P.FPSIZES == 4) + always_comb + case(FmtE) + P.S_FMT: Nf = P.S_NF; + P.D_FMT: Nf = P.D_NF; + P.H_FMT: Nf = P.H_NF; + P.Q_FMT: Nf = P.Q_NF; + endcase + + // Cycle logic + // P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk. + // Integer division needs p fractional + r integer result bits + // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits + // FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle. + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) + + always_comb begin + FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 + + if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; + else ResultBitsE = FPResultBitsE; + + CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) + end + /* verilator lint_on WIDTH */ + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv new file mode 100644 index 000000000..fe1207252 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtexpcalc.sv @@ -0,0 +1,79 @@ +/////////////////////////////////////////// +// fdivsqrtexpcalc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Exponent caclulation for divide and square root +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtfdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] Fmt, + input logic [P.NE-1:0] Xe, Ye, // input exponents + input logic Sqrt, + input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye + output logic [P.NE+1:0] Ue // result exponent + ); + + logic [P.NE-2:0] Bias; + logic [P.NE+1:0] SXExp; + logic [P.NE+1:0] SExp; + logic [P.NE+1:0] DExp; + + // Determine exponent bias according to the format + + if (P.FPSIZES == 1) begin + assign Bias = (P.NE-1)'(P.BIAS); + + end else if (P.FPSIZES == 2) begin + assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); + + end else if (P.FPSIZES == 3) begin + always_comb + case (Fmt) + P.FMT: Bias = (P.NE-1)'(P.BIAS); + P.FMT1: Bias = (P.NE-1)'(P.BIAS1); + P.FMT2: Bias = (P.NE-1)'(P.BIAS2); + default: Bias = 'x; + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (Fmt) + 2'h3: Bias = (P.NE-1)'(P.Q_BIAS); + 2'h1: Bias = (P.NE-1)'(P.D_BIAS); + 2'h0: Bias = (P.NE-1)'(P.S_BIAS); + 2'h2: Bias = (P.NE-1)'(P.H_BIAS); + endcase + end + + // Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms + assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS); + assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias}; + + // division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms + assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); + + // Select square root or division exponent + assign Ue = Sqrt ? SExp : DExp; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv new file mode 100644 index 000000000..87b2ccd0b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv @@ -0,0 +1,116 @@ +/////////////////////////////////////////// +// fdivsqrtpostproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Divide/Square root postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtfdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( + input logic clk, reset, + input logic StallM, + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+3:0] D, // Q4.DIVb + input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb + input logic [P.DIVb+1:0] FirstC, // Q2.DIVb + input logic SqrtE, + input logic SqrtM, SpecialCaseM, + input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0) + input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM, + input logic [P.DIVBLEN-1:0] IntNormShiftM, + input logic [P.XLEN-1:0] PreIntResultM, + output logic [P.DIVb:0] UmM, // U1.DIVb result significand + output logic WZeroE, + output logic DivStickyM, + output logic [P.XLEN-1:0] FIntDivResultM, // U/Q(XLEN.0) + output logic [P.INTDIVb+3:0] PreResultM + +); + + logic [P.DIVb+3:0] Sum; + logic [P.INTDIVb+3:0] W; + logic [P.DIVb:0] PreUmM; + logic NegStickyM; + logic weq0E, WZeroM; + logic [P.XLEN-1:0] IntDivResultM; + logic NegQuotM; // Integer quotient is negative + + ////////////////////////// + // Execute Stage: Detect early termination for an exact result + ////////////////////////// + + // check for early termination on an exact result. + divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS, .WZeroE); + + + ////////////////////////// + // E/M Pipeline register + ////////////////////////// + + flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM); + + ////////////////////////// + // Memory Stage: Postprocessing + ////////////////////////// + + // If the result is not exact, the sticky should be set + assign DivStickyM = ~WZeroM & ~SpecialCaseM; + + // Determine if sticky bit is negative *** Full sum only needed for Integer + assign Sum = WC + WS; + assign NegStickyM = Sum[P.DIVb+3]; + mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit + mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM); + + // Integer quotient or remainder correction, normalization, and special cases + if (P.IDIV_ON_FPU) begin:intpostproc // Int supported + logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; + logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM; + logic [P.INTDIVb+3:0] DTrunc, SumTrunc; + + assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb]; + assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb]; + arithrightshift #(P) rshift(SumTrunc, W); + + assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]}; + + // Integer remainder: sticky and sign correction muxes + assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative + mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM); + + // Select quotient or remainder and do normalization shift + mux2 #(P.INTDIVb+4) presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM); + intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM); + mux2 #(P.INTDIVb+4) preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM); + + divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM); + // sign extend result for W64 + if (P.XLEN==64) begin + mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0], + {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64 + W64M, FIntDivResultM); + end else + assign FIntDivResultM = IntDivResultM[P.XLEN-1:0]; + end +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv new file mode 100644 index 000000000..f39eb7ed5 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv @@ -0,0 +1,250 @@ +/////////////////////////////////////////// +// fdivsqrtpreproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtfdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic IFDivStartE, + input logic [P.NF:0] Xm, Ym, // Floating-point significands + input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents + input logic [P.FMTBITS-1:0] FmtE, + input logic SqrtE, + input logic XZeroE, + input logic [2:0] Funct3E, + output logic [P.NE+1:0] UeM, // biased exponent of result + output logic [P.DIVb+3:0] X, D, // Q4.DIVb + // Int-specific + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU + input logic IntDivE, W64E, + // Outputs + output logic ISpecialCaseE, + output logic [P.DURLEN:0] CyclesE, + output logic [P.DIVBLEN-1:0] IntNormShiftM, + output logic ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM, + output logic AsM, BsM, BZeroM, + output logic [P.XLEN-1:0] AM +); + + logic [P.DIVb:0] Xnorm, Dnorm; + logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed + logic [P.NE+1:0] UeE; // Result Exponent (FP only) + logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input + logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs + logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result + logic NumerZeroE; // Numerator is zero (X or A) + logic SIGNOVERFLOWE; + logic AZeroE, BZeroE; // A or B is Zero for integer division + logic SignedDivE; // signed division + logic AsE, BsE; // Signs of integer inputs + logic [P.XLEN-1:0] AE; // input A after W64 adjustment + logic ALTBE; + logic EvenExp; + + logic [$clog2(P.RK):0] RightShiftX; + logic [P.DIVBLEN-1:0] ZeroDiff, p; + + + ////////////////////////////////////////////////////// + // Integer Preprocessing + ////////////////////////////////////////////////////// + + if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported + logic [P.XLEN-1:0] BE, PosA, PosB; + + // Extract inputs, signs, zero, depending on W64 mode if applicable + assign SignedDivE = ~Funct3E[0]; + + // Source handling + if (P.XLEN==64) begin // 64-bit, supports W64 + mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); + mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); + end else begin // 32 bits only + assign AE = ForwardedSrcAE; + assign BE = ForwardedSrcBE; + end + assign AZeroE = ~(|AE); + assign BZeroE = ~(|BE); + assign AsE = AE[P.XLEN-1] & SignedDivE; + assign BsE = BE[P.XLEN-1] & SignedDivE; + + // Force integer inputs to be postiive + mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA); + mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB); + + // Select integer or floating point inputs + mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX); + mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD); + mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); + end else begin // Int not supported + assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}}; + assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}}; + assign NumerZeroE = XZeroE; + end + + ////////////////////////////////////////////////////// + // Integer & FP leading zero and normalization shift + ////////////////////////////////////////////////////// + + // count leading zeros for Subnorm FP and to normalize integer inputs + divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell); + divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE); + + // Normalization shift: shift leading one into most significant bit + assign Xnorm = (IFX << ell); + assign Dnorm = (IFD << mE); + + ////////////////////////////////////////////////////// + // Integer Right Shift to digit boundary + // Determine DivXShifted (X shifted to digit boundary) + // and nE (number of fractional digits) + ////////////////////////////////////////////////////// + + assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division + + if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported + + // calculate number of result bits + assign ZeroDiff = mE - ell; // Difference in number of leading zeros + assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros) + assign SIGNOVERFLOWE = 1'b0; + + mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p); + + /* verilator lint_off WIDTH */ + assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + + /* verilator lint_on WIDTH */ + + // Integer special cases (terminate immediately) + assign ISpecialCaseE = BZeroE | ALTBE; + + // calculate right shift amount RightShiftX to complete in discrete number of steps + if (P.RK > 1) begin // more than 1 bit per cycle + + /* verilator lint_offf WIDTH */ + assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps + /* verilator lint_on WIDTH */ + end else begin // radix 2 1 copy doesn't require shifting + assign DivXShifted = DivX; + assign RightShiftX = 0; + end + end else begin + assign ISpecialCaseE = 0; + end + + ////////////////////////////////////////////////////// + // Floating-Point Preprocessing + // Extend to Q4.b format + // shift square root to be in range [1/4, 1) + // Normalized numbers are shifted right by 1 if the exponent is odd + // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. + ////////////////////////////////////////////////////// + + + // Sqrt is initialized on step one as R(X-1), so depends on Radix + // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter + // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2) + // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even + // Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits + // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4) + // This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4 + // Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction. + // This also means only one extra fractional bit is needed becaue we never shift right by more than 1. + // Radix Exponent odd Exponent Even + // 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1) + // 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1) + // Summary: PreSqrtX = r(x/2or4 - 1) + + logic [P.DIVb:0] PreSqrtX; + assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even + mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even + +/* + // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift + // This saves one bit in DIVb because there is no initial right shift. + // However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s. + // That is an optimization for another day. + if (P.RADIX == 2) begin + logic [P.DIVb:0] PreSqrtX; // U1.DIVb + mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even + assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1) + end else begin + logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb + mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even + assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1) + end +*/ + + // Initialize X for division or square root + mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + + ////////////////////////////////////////////////////// + // Selet integer or floating-point operands + ////////////////////////////////////////////////////// + if (P.IDIV_ON_FPU) begin + mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + end else begin + assign X = PreShiftX; + end + + // Divisior register + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D); + + // Floating-point exponent + divremsqrtfdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE)); + flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); + + // Number of FSM cycles (to FSM) + divremsqrtfdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); + + if (P.IDIV_ON_FPU) begin:intpipelineregs + logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; + logic RemOpE; + + /* verilator lint_off WIDTH */ + assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift + /* verilator lint_on WIDTH */ + assign RemOpE = Funct3E[1]; + mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); + + // pipeline registers + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); + flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (P.XLEN==64) + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + end + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv new file mode 100644 index 000000000..dc480637b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtflags.sv @@ -0,0 +1,183 @@ + +/////////////////////////////////////////// +// flags.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Post-Processing flag calculation +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtflags import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // X sign + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // is a Inf input being used + input logic XInf, YInf, // inputs are infinity + input logic NaNIn, // is a NaN input being used + input logic XSNaN, YSNaN, // inputs are signaling NaNs + input logic XZero, YZero, // inputs are zero + input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + input logic [P.NE+1:0] Me, // exponent of the normalized sum + // rounding + input logic Plus1, // do you add one for rounding + input logic Round, Guard, Sticky, // bits used to determine rounding + input logic UfPlus1, // do you add one for rounding for the unbounded exponent result + // divsqrt + input logic DivOp, // conversion opperation? + input logic Sqrt, // Sqrt? + // flags + output logic DivByZero, // divide by zero flag + output logic Overflow, // overflow flag to select result + output logic Invalid, // invalid flag to select the result + output logic [4:0] PostProcFlg // flags +); + + logic SigNaN; // is an input a signaling NaN + logic Inexact; // final inexact flag + logic FpInexact; // floating point inexact flag + logic DivInvalid; // integer invalid flag + logic Underflow; // Underflow flag + logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent + + /////////////////////////////////////////////////////////////////////////////// + // Overflow + /////////////////////////////////////////////////////////////////////////////// + + // determine if the result exponent is greater than or equal to the maximum exponent or + // the shift amount is greater than the integers size (for cvt to int) + // ShiftGtIntSz calculation: + // a left shift of intlen+1 is still in range but any more than that is an overflow + // inital: | 64 0's | XLEN | + // | 64 0's | XLEN | << 64 + // | XLEN | 00000... | + // 65 = ...0 0 0 0 0 1 0 0 0 0 0 1 + // | or | | or | + // 33 = ...0 0 0 0 0 0 1 0 0 0 0 1 + // | or | | or | + // larger or equal if: + // - any of the bits after the most significan 1 is one + // - the most signifcant in 65 or 33 is still a one in the number and + // one of the later bits is one + if (P.FPSIZES == 1) begin + assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + + end else if (P.FPSIZES == 2) begin + assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]); + default: ResExpGteMax = 1'bx; + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE]; + P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]); + P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]); + P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]); + endcase + end + + + // calulate overflow flag: + // if the result is greater than or equal to the max exponent(not taking into account sign) + // | and the exponent isn't negitive + // | | if the input isnt infinity or NaN + // | | | + assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero); + + /////////////////////////////////////////////////////////////////////////////// + // Underflow + /////////////////////////////////////////////////////////////////////////////// + + // calculate underflow flag: detecting tininess after rounding + // the exponent is negitive + // | the result is subnormal + // | | the result is normal and rounded from a Subnorm + // | | | and if given an unbounded exponent the result does not round + // | | | | and if the result is not exact + // | | | | | and if the input isnt infinity or NaN + // | | | | | | + //assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + + + /////////////////////////////////////////////////////////////////////////////// + // Inexact + /////////////////////////////////////////////////////////////////////////////// + + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed res isn't outputed + //assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid); + assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); + + // if the res is too small to be represented and not 0 + // | and if the res is not invalid (outside the integer bounds) + // | | + + // select the inexact flag to output + assign Inexact = FpInexact; + + /////////////////////////////////////////////////////////////////////////////// + // Invalid + /////////////////////////////////////////////////////////////////////////////// + + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf + + + assign SigNaN = (XSNaN) | (YSNaN) ; + + //invalid flag for division + assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero); + + assign Invalid = SigNaN | (DivInvalid&DivOp); + + /////////////////////////////////////////////////////////////////////////////// + // Divide by Zero + /////////////////////////////////////////////////////////////////////////////// + + // if dividing by zero and not 0/0 + // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) + assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); + + + /////////////////////////////////////////////////////////////////////////////// + // final flags + /////////////////////////////////////////////////////////////////////////////// + + // Combine flags + // - to integer results do not set the underflow or overflow flags + assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact}; + +endmodule + + + + diff --git a/src/fpu/divremsqrt/divremsqrtintspecialcase.sv b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv new file mode 100644 index 000000000..ff1519aad --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv @@ -0,0 +1,15 @@ +module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) ( + input logic BZeroM,RemOpM, ALTBM, + input logic [P.XLEN-1:0] AM, + input signed [P.INTDIVb+3:0] PreIntResultM, + output logic [P.XLEN-1:0] IntDivResultM +); +always_comb + if (BZeroM) begin // Divide by zero + if (RemOpM) IntDivResultM = AM; + else IntDivResultM = {(P.XLEN){1'b1}}; + end else if (ALTBM) begin // Numerator is small + if (RemOpM) IntDivResultM = AM; + else IntDivResultM = 0; + end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtlzc.sv b/src/fpu/divremsqrt/divremsqrtlzc.sv new file mode 100644 index 000000000..1fa14405b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtlzc.sv @@ -0,0 +1,39 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Leading Zero Counter +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtlzc #(parameter WIDTH = 1) ( + input logic [WIDTH-1:0] num, // number to count the leading zeroes of + output logic [$clog2(WIDTH)-1:0] ZeroCnt // the number of leading zeroes +); + + integer i; + + always_comb begin + i = 0; + while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1; // search for leading one + ZeroCnt = i[$clog2(WIDTH)-1:0]; + end +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtnormshift.sv b/src/fpu/divremsqrt/divremsqrtnormshift.sv new file mode 100644 index 000000000..4fc51b4ad --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtnormshift.sv @@ -0,0 +1,81 @@ +/////////////////////////////////////////// +// normshift.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: normalization shifter +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + // convert shift + // fp -> int: | `XLEN zeros | Mantissa | 0's if necessary | << CalcExp + // process: + // - start - CalcExp = 1 + XExp - Largest Bias + // | `XLEN zeros | Mantissa | 0's if necessary | + // + // - shift left 1 (1) + // | `XLEN-1 zeros |bit| frac | 0's if necessary | + // . <- binary point + // + // - shift left till unbiased exponent is 0 (XExp - Largest Bias) + // | 0's | Mantissa | 0's if necessary | + // | keep | + // + // fp -> fp: + // - if result is subnormal or underflowed: + // | `NF-1 zeros | Mantissa | 0's if necessary | << NF+CalcExp-1 + // process: + // - start + // | mantissa | 0's | + // + // - shift right by NF-1 (NF-1) + // | `NF-1 zeros | mantissa | 0's | + // + // - shift left by CalcExp = XExp - Largest bias + new bias + // | 0's | mantissa | 0's | + // | keep | + // + // - if the input is subnormal: + // | lzcIn | 0's if necessary | << ZeroCnt+1 + // - plus 1 to shift out the first 1 + // + // int -> fp: | lzcIn | 0's if necessary | << ZeroCnt+1 + // - plus 1 to shift out the first 1 + + // fma shift + // | 00 | Sm | << LZA output + // . + // - two extra bits so we can correct for an LZA error of 1 or 2 + + // divsqrt shift + // | Nf 0's | Qm | << calculated shift amount + // . + +module divremsqrtnormshift import cvw::*; #(parameter cvw_t P) ( + input logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt, // shift amount + input logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn, // number to be shifted + output logic [P.NORMSHIFTSZDRSU-1:0] Shifted // shifted result +); + + assign Shifted = ShiftIn << ShiftAmt; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv new file mode 100644 index 000000000..661e48c81 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv @@ -0,0 +1,177 @@ +/////////////////////////////////////////// +// postprocess.sv +// +// Written: kekim@hmc.edu +// Modified: 19 May 2023 +// +// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) ( + // general signals + input logic Xs, Ys, // input signs + input logic [P.NF:0] Xm, Ym, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [3:0] OpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, // inputs are zero + input logic XInf, YInf, // inputs are infinity + input logic XNaN, YNaN, // inputs are NaN + input logic XSNaN, YSNaN, // inputs are signaling NaNs + input logic [1:0] PostProcSel, // select result to be written to fp register + //fma signals + //divide signals + input logic DivSticky, // divider sticky bit + input logic [P.NE+1:0] DivUe, // divsqrt exponent + input logic [P.NF+2:0] DivUm, // divsqrt significand + input logic [P.DIVBLEN-1:0] IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right) + input logic [P.INTDIVb+3:0] PreResultM, // integer result to be shifted + input logic IntDivM, + // final results + output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result + output logic [4:0] PostProcFlg, // postprocesser flags + output logic [P.XLEN-1:0] PreIntResultM // normalized integer result + ); + + + // general signals + logic Rs; // result sign + logic [P.NF-1:0] Rf; // Result fraction + logic [P.NE-1:0] Re; // Result exponent + logic Ms; // norMalized sign + logic [P.NORMSHIFTSZDRSU-1:0] Mf; // norMalized fraction + logic [P.NE+1:0] Me; // normalized exponent + logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic UfPlus1; // do you add one (for determining underflow flag) + logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt; // normalization shift amount + logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn; // input to normalization shift + logic [P.NORMSHIFTSZDRSU-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) + logic Plus1; // add one to the final result? + logic Overflow; // overflow flag used to select results + logic Invalid; // invalid flag used to select results + logic Guard, Round, Sticky; // bits needed to determine rounding + logic [P.FMTBITS-1:0] OutFmt; // output format + // division singals + logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt; // divsqrt shif amount + logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn; // divsqrt shift input + logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift + logic DivByZero; // divide by zero flag + logic DivResSubnorm; // is the divsqrt result subnormal + logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) + // conversion signals + logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter + logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result + logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result + logic CvtResUf; // did the convert result underflow + logic IntInvalid; // invalid integer flag + // readability signals + logic Mult; // multiply opperation + logic Sqrt; // is the divsqrt opperation sqrt + logic Int64; // is the integer 64 bits? + logic Signed; // is the opperation with a signed integer? + logic IntToFp; // is the opperation an int->fp conversion? + logic CvtOp; // convertion opperation + logic DivOp; // divider opperation + logic InfIn; // are any of the inputs infinity + logic NaNIn; // are any of the inputs NaN + + // signals to help readability + + assign DivOp = (PostProcSel == 2'b01); + assign Sqrt = OpCtrl[0]; + + // is there an input of infinity or NaN being used + assign InfIn = XInf|YInf; + assign NaNIn = XNaN|YNaN; + + // choose the ouptut format depending on the opperation + // - fp -> fp: OpCtrl contains the percision of the output + // - otherwise: Fmt contains the percision of the output + if (P.FPSIZES == 2) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); + assign OutFmt = Fmt; + else if (P.FPSIZES == 3 | P.FPSIZES == 4) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; + assign OutFmt = Fmt; + + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// + + // final claulations before shifting + + divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + + assign ShiftAmt = DivShiftAmt; + assign ShiftIn = DivShiftIn; + + // main normalization shift + divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted); + + // correct for LZA/divsqrt error + divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf); + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // round to zero + // round to -infinity + // round to infinity + // round to nearest max magnitude + + // calulate result sign used in rounding unit + divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms); + + divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue, + .Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); + + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// + + assign Rs = Ms; + + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// + + divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, + .Xs, .OutFmt, .Sqrt, + .NaNIn, .Round, .DivByZero, + .Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1, + .Me, .Invalid, .Overflow, .PostProcFlg); + + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + + //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + + divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero, + .Frm, .OutFmt, .XNaN, .YNaN, + .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn, + .XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes ); + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv new file mode 100644 index 000000000..428288783 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtround.sv @@ -0,0 +1,268 @@ +/////////////////////////////////////////// +// divremsqrtround.sv +// +// Written: kekim@hmc.edu, me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Rounder +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + + +module divremsqrtround import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic [2:0] Frm, // rounding mode + input logic Ms, // normalized sign + input logic [P.NORMSHIFTSZDRSU-1:0] Mf, // normalized fraction + // divsqrt + input logic DivOp, // is a division opperation being done + input logic DivSticky, // divsqrt sticky bit + input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent + // outputs + output logic [P.NE+1:0] Me, // normalied fraction + output logic UfPlus1, // do you add one to the result if given an unbounded exponent + output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [P.NE-1:0] Re, // Result exponent + output logic [P.NF-1:0] Rf, // Result fractionNormS + output logic Sticky, // sticky bit + output logic Plus1, // do you add one to the final result + output logic Round, Guard // bits needed to calculate rounding +); + + logic UfCalcPlus1; // calculated plus one for unbounded exponent + logic NormSticky; // normalized sum's sticky bit + logic [P.NF-1:0] RoundFrac; // rounded fraction + logic FpGuard, FpRound; // floating point round/guard bits + logic FpLsbRes; // least significant bit of floating point result + logic LsbRes; // lsb of result + logic CalcPlus1; // calculated plus1 + logic FpPlus1; // do you add one to the fp result + logic [P.FLEN:0] RoundAdd; // how much to add to the result + +// what position is XLEN in? +// options: +// 1: XLEN > NF > NF1 +// 2: NF > XLEN > NF1 +// 3: NF > NF1 > XLEN +// single and double will always be smaller than XLEN + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // {Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - plus 1 otherwise + + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - Plus 1 otherwise + + + // determine what format the final result is in: int or fp + + // sticky bit calculation + if (P.FPSIZES == 1) begin + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]); + + end else if (P.FPSIZES == 2) begin + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) | + (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]); + + + end else if (P.FPSIZES == 3) begin + + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) | + (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]); + + end else if (P.FPSIZES == 4) begin + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) | + (|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) | + (|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) | + (|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT)); + end + + + + // only add the Addend sticky if doing an FMA opperation + // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) + //assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm; + assign Sticky = DivSticky&DivOp | NormSticky; + //assign Sticky = DivSticky&DivOp; + + + + + // determine round and LSB of the rounded value + // - underflow round bit is used to determint the underflow flag + if (P.FPSIZES == 1) begin + assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1]; + assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF]; + assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2]; + + end else if (P.FPSIZES == 2) begin + assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1]; + assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1]; + assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2]; + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2]; + end + P.FMT1: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2]; + end + P.FMT2: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2]; + end + default: begin + FpGuard = 1'bx; + FpLsbRes = 1'bx; + FpRound = 1'bx; + end + endcase + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2]; + end + 2'h1: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2]; + end + 2'h0: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2]; + end + 2'h2: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2]; + end + endcase + end + + + assign Guard = FpGuard; + assign LsbRes = FpLsbRes; + assign Round = FpRound; + + + always_comb begin + // Determine if you add 1 + case (Frm) + 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up + 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (Frm) + 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + + end + + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); + assign FpPlus1 = Plus1; + assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); + + + + + // place Plus1 into the proper position for the format + if (P.FPSIZES == 1) begin + assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1}; + + end else if (P.FPSIZES == 2) begin + // \/FLEN+1 + // | NE+2 | NF | + // '-NE+2-^----NF1----^ + // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1 + assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt}; + + end else if (P.FPSIZES == 3) begin + assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)}; + + end else if (P.FPSIZES == 4) + assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; + + + + // trim unneeded bits from fraction + assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF]; + + + + // select the exponent + assign Me = Ue; + + + + // round the result + // - if the fraction overflows one should be added to the exponent + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; + assign Re = FullRe[P.NE-1:0]; + + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv new file mode 100644 index 000000000..0f808836a --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv @@ -0,0 +1,45 @@ +/////////////////////////////////////////// +// divremsqrtroundsign.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Sign calculation for rounding +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // x sign + input logic Ys, // y sign + input logic Sqrt, // sqrt oppertion? (when using divsqrt unit) + input logic DivOp, // is divsqrt opperation + output logic Ms // normalized result sign +); + + logic Qs; // divsqrt result sign + + // calculate divsqrt sign + assign Qs = Xs^(Ys&~Sqrt); + + // Select sign for rounding calulation + assign Ms = (Qs&DivOp); + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv new file mode 100644 index 000000000..a82756e1e --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv @@ -0,0 +1,94 @@ +/////////////////////////////////////////// +// divremsqrtshiftcorrection.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: shift correction +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) ( + input logic [P.NORMSHIFTSZDRSU-1:0] Shifted, // the shifted sum before LZA correction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivResSubnorm, // is the divsqrt result subnormal + input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent + input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) + //fma + //input logic FmaOp, // is it an fma opperation + //input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + //input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + //input logic FmaSZero, + // output + //output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum + output logic [P.NORMSHIFTSZDRSU-1:0] Mf, // the shifted sum before LZA correction + output logic [P.NE+1:0] Ue // corrected exponent for divider +); + + logic [P.NORMSHIFTSZDRSU-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted + logic [P.NORMSHIFTSZDRSU-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift + logic ResSubnorm; // is the result Subnormal + logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction + logic LeftShiftQm; // should the divsqrt result be shifted one to the left + + // LZA correction + assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1]; + + // correct the shifting error caused by the LZA + // - the only possible mantissa for a plus two is all zeroes + // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone + //mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted); + + // correct the shifting of the divsqrt caused by producing a result in (2, .5] range + // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) + assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1)); + //assign LeftShiftQm = ((DivUe==1)); + assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}}; + assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}}; + mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + + // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits + always_comb + //if(FmaOp) Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}}; + //if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; + if (~DivResSubnorm) Mf = CorrQmShifted; + else Mf = Shifted[P.NORMSHIFTSZDRSU-1:0]; + + // Determine sum's exponent + // main exponent issues: + // - LZA was one too large + // - LZA was two too large + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 1 + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 + // if plus1 If plus2 kill if the result Zero or actually subnormal + // | | | + //assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}}; + + // recalculate if the result is subnormal after LZA correction + //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1]; + + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift + assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; + //assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv new file mode 100644 index 000000000..975c6de3c --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv @@ -0,0 +1,240 @@ +/////////////////////////////////////////// +// divremsqrtspecialcase.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // X sign + input logic [P.NF:0] Xm, Ym, // input significand's + input logic XNaN, YNaN, // are the inputs NaN + input logic [2:0] Frm, // rounding mode + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // are any inputs infinity + input logic NaNIn, // are any input NaNs + input logic XInf, YInf, // are X or Y inifnity + input logic XZero, // is X zero + input logic Plus1, // do you add one for rounding + input logic Rs, // the result's sign + input logic Invalid, Overflow, // flags to choose the result + input logic [P.NE-1:0] Re, // Result exponent + input logic [P.NE+1:0] FullRe, // Result full exponent + input logic [P.NF-1:0] Rf, // Result fraction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivByZero, // divide by zero flag + // outputs + output logic [P.FLEN-1:0] PostProcRes // final result +); + + logic [P.FLEN-1:0] XNaNRes; // X is NaN result + logic [P.FLEN-1:0] YNaNRes; // Y is NaN result + logic [P.FLEN-1:0] InvalidRes; // Invalid result result + logic [P.FLEN-1:0] UfRes; // underflowed result result + logic [P.FLEN-1:0] OfRes; // overflowed result result + logic [P.FLEN-1:0] NormRes; // normal result + logic OfResMax; // does the of result output maximum norm fp number + logic KillRes; // kill the result for underflow + logic SelOfRes; // should the overflow result be selected + + + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); + + // select correct outputs for special cases + if (P.FPSIZES == 1) begin + //NaN res selection depending on standard + if(P.IEEE754) begin + assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Rs, Re, Rf}; + + end else if (P.FPSIZES == 2) begin + if(P.IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end + + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}}; + else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + else + if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}}; + else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + P.FMT1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end + OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; + end + P.FMT2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]}; + YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]}; + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)}; + UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]}; + end + default: begin + if(P.IEEE754) begin + XNaNRes = (P.FLEN)'(0); + YNaNRes = (P.FLEN)'(0); + InvalidRes = (P.FLEN)'(0); + end else begin + InvalidRes = (P.FLEN)'(0); + end + OfRes = (P.FLEN)'(0); + UfRes = (P.FLEN)'(0); + NormRes = (P.FLEN)'(0); + end + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + 2'h1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]}; + YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]}; + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)}; + UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]}; + end + 2'h0: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]}; + YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]}; + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)}; + UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]}; + end + 2'h2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]}; + YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]}; + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)}; + // zero is exact if dividing by infinity so don't add 1 + UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]}; + end + endcase + end + + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); + + // calculate if the overflow result should be selected + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + + // output infinity with result sign if divide by zero + if(P.IEEE754) + always_comb + if(XNaN) PostProcRes = XNaNRes; + else if(YNaN) PostProcRes = YNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + +endmodule diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv new file mode 100644 index 000000000..2385cac20 --- /dev/null +++ b/src/fpu/divremsqrt/drsu.sv @@ -0,0 +1,102 @@ +/////////////////////////////////////////// +// drsu.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module drsu import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic [P.FMTBITS-1:0] FmtE, + input logic XsE, YsE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic XSNaNE, YSNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + input logic [2:0] Frm, + input logic [3:0] OpCtrl, + input logic [1:0] PostProcSel, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [P.FLEN-1:0] FResM, + output logic [P.XLEN-1:0] FIntDivResultM, + output logic [4:0] FlgM +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic [P.DURLEN-1:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic IntDivM; // Integer operation + logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + logic [P.DIVb:0] UmM; + logic [P.NF+2:0] UmMexact; //U1.NF+2 + logic [P.NE+1:0] UeM; + logic DivStickyM; + logic [P.INTDIVb+3:0] PreResultM; + logic [P.XLEN-1:0] PreIntResultM; + logic [P.DIVBLEN-1:0] IntNormShiftM; + + divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, + .XeE, .YeE, .SqrtE, .SqrtM, + .XInfE, .YInfE, .XZeroE, .YZeroE, + .XNaNE, .YNaNE, + .FDivStartE, .IDivStartE, .W64E, + .StallM, .DivStickyM, .FDivBusyE, .UeM, + .UmM, + .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M, + .Funct3E, .IntDivE, .FIntDivResultM, .IntDivM, + .FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM); + assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs + divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM, + .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), + .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM), + .PreIntResultM, .PreResultM, .IntNormShiftM); +endmodule + diff --git a/src/fpu/divremsqrt/intrightshift.sv b/src/fpu/divremsqrt/intrightshift.sv new file mode 100644 index 000000000..dd4f47aeb --- /dev/null +++ b/src/fpu/divremsqrt/intrightshift.sv @@ -0,0 +1,37 @@ +/////////////////////////////////////////// +// fdivsqrtpostproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Divide/Square root postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module intrightshift import cvw::*; #(parameter cvw_t P) ( + input logic signed [P.INTDIVb+3:0] shiftin, + input logic [P.DIVBLEN-1:0] shiftamt, + output logic signed [P.INTDIVb+3:0] shifted +); + assign shifted = shiftin >> shiftamt; + +endmodule diff --git a/testbench/common/wallyTracer.sv b/testbench/common/wallyTracer.sv index 80f7af651..5676f6c13 100644 --- a/testbench/common/wallyTracer.sv +++ b/testbench/common/wallyTracer.sv @@ -169,12 +169,17 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); CSRArray[12'h143] = testbench.dut.core.priv.priv.csr.csrs.csrs.STVAL_REGW; CSRArray[12'h142] = testbench.dut.core.priv.priv.csr.csrs.csrs.SCAUSE_REGW; CSRArray[12'h144] = testbench.dut.core.priv.priv.csr.csrm.MIP_REGW & 12'h222 & testbench.dut.core.priv.priv.csr.csrm.MIDELEG_REGW; - CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW; + CSRArray[12'h14D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[P.XLEN-1:0]; // user CSRs CSRArray[12'h001] = testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW; CSRArray[12'h002] = testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW; CSRArray[12'h003] = {testbench.dut.core.priv.priv.csr.csru.csru.FRM_REGW, testbench.dut.core.priv.priv.csr.csru.csru.FFLAGS_REGW}; + if (P.XLEN == 32) begin + CSRArray[12'h310] = testbench.dut.core.priv.priv.csr.csrsr.MSTATUSH_REGW; + CSRArray[12'h31A] = testbench.dut.core.priv.priv.csr.csrm.MENVCFGH_REGW; + CSRArray[12'h15D] = testbench.dut.core.priv.priv.csr.csrs.csrs.STIMECMP_REGW[63:32]; + end end else begin // hold the old value if the pipeline is stalled. // PMP CFG 3A0 to 3AF diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv new file mode 100644 index 000000000..9ca2e5b61 --- /dev/null +++ b/testbench/testbench-fp.sv @@ -0,0 +1,1682 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com, james.stine@okstate.edu +// +// Purpose: Testbench for UCB Testfloat on Wally +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "config.vh" +`include "tests-fp.vh" + +import cvw::*; + +module testbenchfp; + // Two parameters TEST, TEST_SIZE used with testfloat.do in sim dir + // to run specific precisions (e.g., quad or all) + parameter TEST="none"; + parameter TEST_SIZE="none"; + + `include "parameter-defs.vh" + + //parameter MAXVECTORS = 8388610; + parameter MAXVECTORS = 100000; + + // FIXME: needs cleaning of unused variables (jes) + string Tests[]; // list of tests to be run + logic [3:0] OpCtrl[]; // list of op controls + logic [2:0] Unit[]; // list of units being tested + logic WriteInt[]; // Is being written to integer resgiter + logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 + //logic [2:0] Frm[4:0] = {3'b011, 3'b011, 3'b011, 3'b011, 3'b011}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 *** MODIFIED ROUNDING MODES + logic [1:0] Fmt[]; // list of formats for the other units + + logic clk=0; + logic [31:0] TestNum=0; // index for the test + logic [31:0] OpCtrlNum=0; // index for OpCtrl + logic [31:0] errors=0; // how many errors + logic [31:0] VectorNum=0; // index for test vector + logic [31:0] FrmNum=0; // index for rounding mode + logic [P.Q_LEN*4+7:0] TestVectors[MAXVECTORS:0]; // list of test vectors + + logic [1:0] FmtVal; // value of the current Fmt + logic [2:0] UnitVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal + logic [3:0] OpCtrlVal; + logic WriteIntVal; // value of the current WriteInt + logic [P.FLEN-1:0] X, Y, Z; // inputs read from TestFloat + logic [P.FLEN-1:0] XPostBox; // inputs read from TestFloat + logic [P.XLEN-1:0] SrcA, SrcB; // integer input + logic W64; // is W64 instruction + logic [P.FLEN-1:0] Ans; // correct answer from TestFloat + logic [P.FLEN-1:0] Res; // result from other units + logic [4:0] AnsFlg; // correct flags read from testfloat + logic [4:0] ResFlg, Flg; // Result flags + logic [P.FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad + logic [P.FLEN-1:0] FpRes, FpCmpRes; // Results from each unit + logic [P.XLEN-1:0] IntRes, CmpRes; // Results from each unit + logic [4:0] FmaFlg, CvtFlg, DivFlg; // Outputed flags + logic [4:0] CmpFlg; // Outputed flags + logic AnsNaN, ResNaN, NaNGood; + logic Xs, Ys, Zs; // sign of the inputs + logic [P.NE-1:0] Xe, Ye, Ze; // exponent of the inputs + logic [P.NF:0] Xm, Ym, Zm; // mantissas of the inputs + logic XNaN, YNaN, ZNaN; // is the input NaN + logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN + logic XSubnorm, ZSubnorm; // is the input denormalized + logic XInf, YInf, ZInf; // is the input infinity + logic XZero, YZero, ZZero; // is the input zero + logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones + logic [P.CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) + logic IntZero; + logic CvtResSgnE; + logic [P.NE:0] CvtCalcExpE; // the calculated exponent + logic [P.LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [P.DIVb:0] Quot; + logic CvtResSubnormUfE; + logic DivStart=0; + logic FDivBusyE; + logic OldFDivBusyE; + logic reset = 1'b0; + logic [$clog2(P.NF+2)-1:0] XZeroCnt, YZeroCnt; + + // in-between FMA signals + logic Mult; + logic Ss; + logic [P.NE+1:0] Pe; + logic [P.NE+1:0] Se; + logic ASticky; + logic KillProd; + logic [$clog2(3*P.NF+5)-1:0] SCnt; + logic [3*P.NF+3:0] Sm; + logic InvA; + logic NegSum; + logic As; + logic Ps; + logic DivSticky; + logic DivDone; + logic DivNegSticky; + logic [P.NE+1:0] DivCalcExp; + logic divsqrtop; + + // Missing logic vectors fdivsqrt + logic [2:0] Funct3E; + logic [2:0] Funct3M; + logic FlushE; + logic IFDivStartE; + logic IDivStart; + logic FDivDoneE; + logic [P.NE+1:0] UeM; + logic [P.DIVb:0] UmM; + logic [P.XLEN-1:0] FIntDivResultM; + logic ResMatch; // Check if result match + logic FlagMatch; // Check if IEEE flags match + logic CheckNow; // Final check + logic FMAop; // Is this a FMA operation? + logic IntDivE; // Is Integer operation on FPU? + + // FSM for testing each item per clock + typedef enum logic [2:0] {S0, Start, S2, Done} statetype; + statetype state, nextstate; + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||||| |||||||| ||||||| ||||||||| ||||||| |||||||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||| ||||||| ||| ||||||| |||||||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||| ||||||| ||| ||||||| |||||||| ||||||||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // select tests relevent to the specified configuration + // cvtint - test integer conversion unit (fcvtint) + // cvtfp - test floating-point conversion unit (fcvtfp) + // cmp - test comparison unit's LT, LE, EQ opperations (fcmp) + // add - test addition + // sub - test subtraction + // div - test division + // sqrt - test square root + // all - test all of the above + flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M)); + + initial begin + // Information displayed for user on what is simulating + // $display("\nThe start of simulation..."); + $display("\nThe start of simulation... INTDIVb: %d, DIVB: %d, DIVBLEN: %d , RK: %d",INTDIVb, DIVb, DIVBLEN, RK); + // $display("This simulation for TEST is %s", TEST); + if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported + if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion + // add the 128-bit cvtint tests to the to-be-tested list + Tests = {Tests, f128rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; + end + if (P.XLEN == 64) begin // if 64-bit integers are supported add their conversions + Tests = {Tests, f128rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + // if the floating-point conversions are being tested + if (TEST === "cvtfp" | TEST === "all") begin + if (P.D_SUPPORTED) begin // if double precision is supported + // add the 128 <-> 64 bit conversions to the to-be-tested list + Tests = {Tests, f128f64cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b01, 3'b11}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (P.F_SUPPORTED) begin // if single precision is supported + // add the 128 <-> 32 bit conversions to the to-be-tested list + Tests = {Tests, f128f32cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b00, 3'b11}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (P.ZFH_SUPPORTED) begin // if half precision is supported + // add the 128 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f128f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b11}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested + // add the compare tests/op-ctrls/unit/fmt + Tests = {Tests, f128cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the addition tests/op-ctrls/unit/fmt + Tests = {Tests, f128add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + // add the subtraction tests/op-ctrls/unit/fmt + Tests = {Tests, f128sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the multiply tests/op-ctrls/unit/fmt + Tests = {Tests, f128mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested + Tests = {Tests, f128fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported + if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested + Tests = {Tests, f64rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + if (P.XLEN == 64) begin // if 64-bit integers are being supported + Tests = {Tests, f64rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested + if (P.F_SUPPORTED) begin // if single precision is supported + // add the 64 <-> 32 bit conversions to the to-be-tested list + Tests = {Tests, f64f32cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b00, 3'b01}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (P.ZFH_SUPPORTED) begin // if half precision is supported + // add the 64 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f64f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b01}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested + Tests = {Tests, f64fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported + if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested + Tests = {Tests, f32rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + if (P.XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f32rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested + if (P.ZFH_SUPPORTED) begin + // add the 32 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f32f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b00}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiply is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested + Tests = {Tests, f32fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end + if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported + if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested + Tests = {Tests, f16rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (P.XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f16rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested + Tests = {Tests, f16fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (P.IDIV_ON_FPU |1'b1) begin + if (P.Q_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (P.D_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (P.S_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + + end + if (P.ZFH_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (P.XLEN == 64 & P.IDIV_ON_FPU) begin + if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested + Tests = {Tests, int64rem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested + Tests = {Tests, int64div}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested + Tests = {Tests, int64remu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested + Tests = {Tests, int64divu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer remainder is being tested + Tests = {Tests, int64remw}; + OpCtrl = {OpCtrl, `INTREMW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer remainder is being tested + Tests = {Tests, int64remuw}; + OpCtrl = {OpCtrl, `INTREMUW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer division is being tested + Tests = {Tests, int64divw}; + OpCtrl = {OpCtrl, `INTDIVW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer divison is being tested + Tests = {Tests, int64divuw}; + OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + // RV32 + else if (P.IDIV_ON_FPU) begin + if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested + Tests = {Tests, int32rem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested + Tests = {Tests, int32div}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested + Tests = {Tests, int32remu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested + Tests = {Tests, int32divu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + // check if nothing is being tested + + $display("This simulation for TEST contains %d vectors", Tests.size); + if (Tests.size() == 0) begin + $display("TEST %s not supported in this configuration", TEST); + $stop; + end + end + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||||| |||||||| ||||||||| ||||||| ||||||||| |||||||| ||||||| ||||||||| + // ||| ||| ||| ||| ||| || || ||| ||| ||| ||| + // |||||||| |||||||| ||||||||| || || ||| |||||||| ||||||| ||| + // ||| || ||| ||| ||| || || ||| ||| ||| ||| + // ||| ||| |||||||| ||| ||| ||||||| ||| |||||||| ||||||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // Read the first test + initial begin + //string testname = {`PATH, Tests[TestNum]}; + static string pp = `PATH; + string testname; + string tt0; + tt0 = $psprintf("%s", Tests[TestNum]); + testname = {pp, tt0}; + //$display("Here you are %s", testname); + $display("\n\nRunning %s vectors ", Tests[TestNum]); + $readmemh(testname, TestVectors); + + // set the test index to 0 + TestNum = 0; + end + + // set a the signals for all tests + always_comb UnitVal = Unit[TestNum]; + always_comb FmtVal = Fmt[TestNum]; + always_comb OpCtrlVal = OpCtrl[OpCtrlNum]; + always_comb WriteIntVal = WriteInt[OpCtrlNum]; + always_comb FrmVal = Frm[FrmNum]; + + // modify the format signal if only 2 percisions supported + // - 1 for the larger precision + // - 0 for the smaller precision + always_comb begin + if (P.FMTBITS == 1) ModFmt = FmtVal == P.FMT; + else ModFmt = FmtVal; + end + + // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector + readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), + .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB, + .Xs, .Ys, .Zs, .Unit(UnitVal), + .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), + .Xm, .Ym, .Zm, + .XNaN, .YNaN, .ZNaN, + .XSNaN, .YSNaN, .ZSNaN, + .XSubnorm, .ZSubnorm, + .XZero, .YZero, .ZZero, + .XInf, .YInf, .ZInf, .XExpMax, .Funct3E, .W64, + .X, .Y, .Z, .XPostBox); + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||| ||| ||| ||||||||| + // ||| ||| ||| ||| ||| + // ||| ||| ||| ||| ||| + // ||| ||| ||| ||| ||| + // ||||||| ||||||||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // instantiate devices under test + if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma + fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), + .Xe(Xe), .Ye(Ye), .Ze(Ze), + .Xm(Xm), .Ym(Ym), .Zm(Zm), + .XZero, .YZero, .ZZero, .Ss, .Se, + .OpCtrl(OpCtrlVal[2:0]), .Sm, .InvA, .SCnt, .As, .Ps, + .ASticky); + end + + if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt + fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), + .XZero(XZero), .OpCtrl(OpCtrlVal[2:0]), .IntZero, + .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), + .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); + end + + if (TEST === "cmp" | TEST === "all") begin: fcmp + fcmp #(P) fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal[2:0]), .Xs, .Ys, .Xe, .Ye, + .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes), + .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes)); + end + + if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt + fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), + .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), + .XNaNE(XNaN), .YNaNE(YNaN), + .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0), + .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp), + .UmM(Quot), + .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M), + .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM), + .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE)); + end + if (TEST === "fdivremsqrt" | TEST === "div_drsu" | TEST === "sqrt_drsu" | TEST === "intdivrem" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" ) begin: divremsqrt + drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), + .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]), + .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), + .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64), + .StallM(1'b0), .FDivBusyE, + .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M), + .Funct3E(Funct3E), .IntDivE(IntDivE), + .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg)); + end + else begin: postprocess + postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), + .OpCtrl(OpCtrlVal[2:0]), .DivUm(Quot), .DivUe(DivCalcExp), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss), + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE), + .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE), + .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), + .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, + .FmaASticky(ASticky), .FmaSe(Se), + .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); + end + + assign CmpFlg[3:0] = 0; + + // produce clock + always begin + clk = 1; #5; clk = 0; #5; + end + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||| ||| |||||||||| ||||| ||| + // ||||||| ||| ||| ||| ||||||| ||| + // |||| ||| ||| |||||||||| |||| ||| ||| + // |||| ||| ||| ||| ||| |||| ||| ||| + // |||| ||| ||| ||| ||| |||| ||| ||| + // |||| |||||| ||| ||| |||| |||||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // Check if the correct answer and result is a NaN + always_comb begin + if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin + // an integer output can't be a NaN + AnsNaN = 1'b0; + ResNaN = 1'b0; + end + else if (UnitVal === `CVTFPUNIT) begin + case (OpCtrlVal[1:0]) + 2'b11: begin // quad + AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]); + ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]); + end + 2'b01: begin // double + AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]); + ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]); + end + 2'b00: begin // single + AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]); + ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]); + end + 2'b10: begin // half + AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]); + ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]); + end + endcase + end + else begin + case (FmtVal) + 2'b11: begin // quad + AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]); + ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]); + end + 2'b01: begin // double + AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]); + ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]); + end + 2'b00: begin // single + AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]); + ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]); + end + 2'b10: begin // half + AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]); + ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]); + end + endcase + end + end + + always_comb begin + // select the result to check + case (UnitVal) + `FMAUNIT: Res = FpRes; + `DIVUNIT: Res = FpRes; + `CMPUNIT: Res = CmpRes; + `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes; + `CVTFPUNIT: Res = FpRes; + `INTDIVUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes; + endcase + + // select the flag to check + case (UnitVal) + `FMAUNIT: ResFlg = Flg; + `DIVUNIT: ResFlg = Flg; + `CMPUNIT: ResFlg = CmpFlg; + `CVTINTUNIT: ResFlg = Flg; + `CVTFPUNIT: ResFlg = Flg; + `INTDIVUNIT: ResFlg = Flg; + endcase + + // Use four state test sequence to handle div properly. + // Four states should allow other operations to finish + // properly and within time. + case (state) + S0: begin + DivStart = 1'b0; + nextstate = Start; + end + Start: begin + if (UnitVal == `DIVUNIT | (UnitVal == `INTDIVUNIT & (OpCtrlVal == `SQRT_OPCTRL | OpCtrlVal == `DIV_OPCTRL))) begin + DivStart = 1'b1; + IntDivE = 1'b0; + end + else if (UnitVal == `INTDIVUNIT) begin + IDivStart = 1'b1; + IntDivE = 1'b1; + end + else + DivStart = 1'b0; + nextstate = S2; + end + S2: begin + DivStart = 1'b0; + IDivStart = 1'b0; + if ((FDivBusyE|~DivDone)&(UnitVal == `DIVUNIT | UnitVal == `INTDIVUNIT)) + nextstate = S2; + else + nextstate = Done; + end + Done: begin + DivStart = 1'b0; + IDivStart = 1'b0; + IntDivE = 1'b0; + nextstate = S0; + end + endcase // case (state) + + end + + // Provide reset for divsqrt to reset state + initial + begin + #0 reset = 1'b1; + #25 reset = 1'b0; + end + + // Left-over from before - will remove soon + always @(posedge clk) + OldFDivBusyE = FDivDoneE; + + // state machine to handle timing for testing due + // various cycle counts for different fp/int operations + // Adds vector at start of clock + always @(posedge clk) begin + + // state machine element for testing + if (reset) + state <= S0; + else + state <= nextstate; + + // Increment the vector when Done with each test + if (state == Done) + VectorNum += 1; // increment the vector + + end + + // check results on falling edge of clk + always @(negedge clk) begin + // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify: + // - the sign of the NaN does not matter for the opperations being tested + // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter + if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) + case (FmtVal) + 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | + (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]})) | + (ZNaN&(Res[P.Q_LEN-2:0] === {Z[P.Q_LEN-2:P.Q_NF],1'b1,Z[P.Q_NF-2:0]}))); + 2'b01: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | + (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]})) | + (ZNaN&(Res[P.D_LEN-2:0] === {Z[P.D_LEN-2:P.D_NF],1'b1,Z[P.D_NF-2:0]}))); + 2'b00: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | + (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]})) | + (ZNaN&(Res[P.S_LEN-2:0] === {Z[P.S_LEN-2:P.S_NF],1'b1,Z[P.S_NF-2:0]}))); + 2'b10: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | + (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) | + (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]}))); + endcase + else if (UnitVal === `CVTFPUNIT) // if converting from FP to FP OpCtrl contains the final FP format + case (OpCtrlVal[1:0]) + 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | + (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | + (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]}))); + 2'b01: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.D_LEN-2:0] === Ans[P.D_LEN-2:0])) | + (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | + (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]}))); + 2'b00: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.S_LEN-2:0] === Ans[P.S_LEN-2:0])) | + (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | + (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]}))); + 2'b10: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | + (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | + (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]}))); + endcase + else NaNGood = 1'b0; // integers can't be NaNs + + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||||| ||||||| ||| |||||| + // ||| ||| ||| ||| ||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // check if result is correct + assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); + assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); + assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal ==`INTDIVU_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTREMW_OPCTRL) | (OpCtrlVal == `INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) ; + assign FMAop = (OpCtrlVal == `FMAUNIT); + assign DivDone = OldFDivBusyE & ~FDivBusyE; + //assign DivDone = ~FDivBusyE; + //assign DivDone = FDivDoneE; + assign CheckNow = ((DivDone | ~divsqrtop) | + (TEST == "add" | TEST == "fma" | TEST == "sub") | + ((TEST == "all") & (DivDone | ~divsqrtop))); + + if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin + errors += 1; + $display("\nError in %s", Tests[TestNum]); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $stop; + end + + if (TestVectors[VectorNum][100:0] === 101'bx & Tests[TestNum] !== "" ) begin // if reached the eof + // increment the test + TestNum += 1; + // clear the vectors + for(int i=0; i quad + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.Q_LEN+P.XLEN-1:8+(P.Q_LEN)]; + Ans = TestVector[8+(P.Q_LEN-1):8]; + end + 2'b10: begin // int -> quad + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.Q_LEN+32-1]}}, TestVector[8+P.Q_LEN+32-1:8+(P.Q_LEN)]}; + Ans = TestVector[8+(P.Q_LEN-1):8]; + end + 2'b01: begin // quad -> long + X = {TestVector[8+P.XLEN+P.Q_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // quad -> int + X = {TestVector[8+32+P.Q_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end + endcase + end + 2'b01: if (P.D_SUPPORTED) begin // double + // {Int->Fp?, is the integer a long} + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> double + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.D_LEN+P.XLEN-1:8+(P.D_LEN)]; + Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; + end + 2'b10: begin // int -> double + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.D_LEN+32-1]}}, TestVector[8+P.D_LEN+32-1:8+(P.D_LEN)]}; + Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; + end + 2'b01: begin // double -> long + X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // double -> int + X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end + endcase + end + 2'b00: if (P.S_SUPPORTED) begin // single + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> single + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.S_LEN+P.XLEN-1:8+(P.S_LEN)]; + Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; + end + 2'b10: begin // int -> single + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.S_LEN+32-1]}}, TestVector[8+P.S_LEN+32-1:8+(P.S_LEN)]}; + Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; + end + 2'b01: begin // single -> long + X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // single -> int + X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end + endcase + end + 2'b10: begin // half + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> half + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.H_LEN+P.XLEN-1:8+(P.H_LEN)]; + Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; + end + 2'b10: begin // int -> half + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.H_LEN+32-1]}}, TestVector[8+P.H_LEN+32-1:8+(P.H_LEN)]}; + Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; + end + 2'b01: begin // half -> long + X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // half -> int + X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]}; + end + endcase + end + endcase + endcase + end + + assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]) | ((Unit == `INTDIVUNIT) & OpCtrl === `SQRT_OPCTRL)); + assign ZEn = (Unit == `FMAUNIT); + assign FPUActive = 1'b1; + + unpack #(P) unpack(.X, .Y, .Z, .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, + .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, + .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, + .XEn, .YEn, .ZEn, .XExpMax, .XPostBox); + +endmodule diff --git a/testbench/testbench.sv b/testbench/testbench.sv index d214ef3b6..f91bdcc67 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -762,7 +762,7 @@ end void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VENDOR, "riscv.ovpworld.org")); void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_NAME, "riscv")); void'(rvviRefConfigSetString(IDV_CONFIG_MODEL_VARIANT, "RV64GCK")); - void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, 56)); + void'(rvviRefConfigSetInt(IDV_CONFIG_MODEL_ADDRESS_BUS_WIDTH, XLEN==64 ? 56 : 34)); void'(rvviRefConfigSetInt(IDV_CONFIG_MAX_NET_LATENCY_RETIREMENTS, 6)); if(elffilename == "buildroot") filename = ""; @@ -824,15 +824,25 @@ end void'(rvviRefCsrSetVolatile(0, 32'hC02)); // INSTRET void'(rvviRefCsrSetVolatile(0, 32'hB02)); // MINSTRET void'(rvviRefCsrSetVolatile(0, 32'hC01)); // TIME - + if (P.XLEN == 32) begin + void'(rvviRefCsrSetVolatile(0, 32'hC80)); // CYCLEH + void'(rvviRefCsrSetVolatile(0, 32'hB80)); // MCYCLEH + void'(rvviRefCsrSetVolatile(0, 32'hC82)); // INSTRETH + void'(rvviRefCsrSetVolatile(0, 32'hB82)); // MINSTRETH + void'(rvviRefCsrSetVolatile(0, 32'hC81)); // TIMEH + end // User HPMCOUNTER3 - HPMCOUNTER31 for (iter='hC03; iter<='hC1F; iter++) begin void'(rvviRefCsrSetVolatile(0, iter)); // HPMCOUNTERx + if (P.XLEN == 32) + void'(rvviRefCsrSetVolatile(0, iter+128)); // HPMCOUNTERxH end // Machine MHPMCOUNTER3 - MHPMCOUNTER31 for (iter='hB03; iter<='hB1F; iter++) begin void'(rvviRefCsrSetVolatile(0, iter)); // MHPMCOUNTERx + if (P.XLEN == 32) + void'(rvviRefCsrSetVolatile(0, iter+128)); // MHPMCOUNTERxH end // cannot predict this register due to latency between diff --git a/testbench/testbench_fp.sv b/testbench/testbench_fp.sv index 61fa12fcc..1617d392c 100644 --- a/testbench/testbench_fp.sv +++ b/testbench/testbench_fp.sv @@ -23,26 +23,28 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "config.vh" -`include "tests_fp.vh" +`include "tests-fp.vh" import cvw::*; module testbench_fp; // Two parameters TEST, TEST_SIZE used with testfloat.do in sim dir // to run specific precisions (e.g., quad or all) - parameter string TEST="none"; // choices are cvtint, cvtfp, cmp, add, sub, mul, div, sqrt, fma; all does not check properly - parameter string TEST_SIZE="all"; + parameter TEST="none"; + parameter TEST_SIZE="none"; `include "parameter-defs.vh" - parameter MAXVECTORS = 8388610; + //parameter MAXVECTORS = 8388610; + parameter MAXVECTORS = 100000; // FIXME: needs cleaning of unused variables (jes) string Tests[]; // list of tests to be run - logic [2:0] OpCtrl[]; // list of op controls + logic [3:0] OpCtrl[]; // list of op controls logic [2:0] Unit[]; // list of units being tested logic WriteInt[]; // Is being written to integer resgiter logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 + //logic [2:0] Frm[4:0] = {3'b011, 3'b011, 3'b011, 3'b011, 3'b011}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 *** MODIFIED ROUNDING MODES logic [1:0] Fmt[]; // list of formats for the other units logic clk=0; @@ -51,22 +53,23 @@ module testbench_fp; logic [31:0] errors=0; // how many errors logic [31:0] VectorNum=0; // index for test vector logic [31:0] FrmNum=0; // index for rounding mode - logic [P.Q_LEN*4+7:0] TestVectors[MAXVECTORS-1:0]; // list of test vectors + logic [P.Q_LEN*4+7:0] TestVectors[MAXVECTORS:0]; // list of test vectors logic [1:0] FmtVal; // value of the current Fmt - logic [2:0] UnitVal, OpCtrlVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal + logic [2:0] UnitVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal + logic [3:0] OpCtrlVal; logic WriteIntVal; // value of the current WriteInt - logic [P.Q_LEN-1:0] X, Y, Z; // inputs read from TestFloat + logic [P.FLEN-1:0] X, Y, Z; // inputs read from TestFloat logic [P.FLEN-1:0] XPostBox; // inputs read from TestFloat - logic [P.XLEN-1:0] SrcA; // integer input - logic [P.Q_LEN-1:0] Ans; // correct answer from TestFloat - logic [P.Q_LEN-1:0] Res; // result from other units + logic [P.XLEN-1:0] SrcA, SrcB; // integer input + logic W64; // is W64 instruction + logic [P.FLEN-1:0] Ans; // correct answer from TestFloat + logic [P.FLEN-1:0] Res; // result from other units logic [4:0] AnsFlg; // correct flags read from testfloat logic [4:0] ResFlg, Flg; // Result flags logic [P.FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad logic [P.FLEN-1:0] FpRes, FpCmpRes; // Results from each unit logic [P.XLEN-1:0] IntRes, CmpRes; // Results from each unit - logic [P.Q_LEN-1:0] FpResExtended; // FpRes extended to same length as Ans/Res logic [4:0] FmaFlg, CvtFlg, DivFlg; // Outputed flags logic [4:0] CmpFlg; // Outputed flags logic AnsNaN, ResNaN, NaNGood; @@ -99,8 +102,8 @@ module testbench_fp; logic [P.NE+1:0] Se; logic ASticky; logic KillProd; - logic [$clog2(P.FMALEN+1)-1:0] SCnt; - logic [P.FMALEN-1:0] Sm; + logic [$clog2(3*P.NF+5)-1:0] SCnt; + logic [3*P.NF+3:0] Sm; logic InvA; logic NegSum; logic As; @@ -116,6 +119,7 @@ module testbench_fp; logic [2:0] Funct3M; logic FlushE; logic IFDivStartE; + logic IDivStart; logic FDivDoneE; logic [P.NE+1:0] UeM; logic [P.DIVb:0] UmM; @@ -124,9 +128,7 @@ module testbench_fp; logic FlagMatch; // Check if IEEE flags match logic CheckNow; // Final check logic FMAop; // Is this a FMA operation? - - logic [P.NE-2:0] BiasE; // Bias of exponent - logic [P.LOGFLEN-1:0] NfE; // Number of fractional bits + logic IntDivE; // Is Integer operation on FPU? // FSM for testing each item per clock typedef enum logic [2:0] {S0, Start, S2, Done} statetype; @@ -150,16 +152,16 @@ module testbench_fp; // sub - test subtraction // div - test division // sqrt - test square root - // all - test all of the above < doesn't report errors properly > - + // all - test all of the above + flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M)); + initial begin // Information displayed for user on what is simulating // $display("\nThe start of simulation..."); + $display("\nThe start of simulation... INTDIVb: %d, DIVB: %d, DIVBLEN: %d , RK: %d",INTDIVb, DIVb, DIVBLEN, RK); // $display("This simulation for TEST is %s", TEST); - // $display("This simulation for TEST is of the operand size of %s", TEST_SIZE); - - if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported - if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion + if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported + if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion // add the 128-bit cvtint tests to the to-be-tested list Tests = {Tests, f128rv32cvtint}; // add the op-codes for these tests to the op-code list @@ -177,13 +179,13 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; end end - end - // if the floating-point conversions are being tested - if (TEST === "cvtfp" | TEST === "all") begin + end + // if the floating-point conversions are being tested + if (TEST === "cvtfp" | TEST === "all") begin if (P.D_SUPPORTED) begin // if double precision is supported // add the 128 <-> 64 bit conversions to the to-be-tested list Tests = {Tests, f128f64cvt}; @@ -192,12 +194,12 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; end end if (P.F_SUPPORTED) begin // if single precision is supported @@ -208,12 +210,12 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; end end if (P.ZFH_SUPPORTED) begin // if half precision is supported @@ -224,16 +226,16 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested + end + if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested // add the compare tests/op-ctrls/unit/fmt Tests = {Tests, f128cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -242,8 +244,8 @@ module testbench_fp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the addition tests/op-ctrls/unit/fmt Tests = {Tests, f128add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -252,8 +254,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested // add the subtraction tests/op-ctrls/unit/fmt Tests = {Tests, f128sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -262,8 +264,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested // add the multiply tests/op-ctrls/unit/fmt Tests = {Tests, f128mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -272,8 +274,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the divide tests/op-ctrls/unit/fmt Tests = {Tests, f128div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -282,8 +284,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested // add the square-root tests/op-ctrls/unit/fmt Tests = {Tests, f128sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -292,8 +294,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested + end + if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested Tests = {Tests, f128fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -301,10 +303,10 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end + end end if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported - if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested + if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested Tests = {Tests, f64rv32cvtint}; // add the op-codes for these tests to the op-code list OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; @@ -321,12 +323,12 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; end end - end - if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested if (P.F_SUPPORTED) begin // if single precision is supported // add the 64 <-> 32 bit conversions to the to-be-tested list Tests = {Tests, f64f32cvt}; @@ -335,12 +337,12 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; end end if (P.ZFH_SUPPORTED) begin // if half precision is supported @@ -351,16 +353,16 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -369,8 +371,8 @@ module testbench_fp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -379,8 +381,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -389,8 +391,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -399,8 +401,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -409,8 +411,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -419,8 +421,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested + end + if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested Tests = {Tests, f64fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -428,10 +430,10 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end + end end if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported - if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested + if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested Tests = {Tests, f32rv32cvtint}; // add the op-codes for these tests to the op-code list OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; @@ -448,12 +450,12 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; end end - end - if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested if (P.ZFH_SUPPORTED) begin // add the 32 <-> 16 bit conversions to the to-be-tested list Tests = {Tests, f32f16cvt}; @@ -462,16 +464,16 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested + end + if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -480,8 +482,8 @@ module testbench_fp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -490,8 +492,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -500,8 +502,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiply is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiply is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -510,8 +512,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -520,8 +522,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -530,8 +532,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "fma" | TEST === "all") begin // if fma is being tested + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f32fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -539,10 +541,10 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end + end end if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported - if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested + if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested Tests = {Tests, f16rv32cvtint}; // add the op-codes for these tests to the op-code list OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; @@ -559,12 +561,12 @@ module testbench_fp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -573,8 +575,8 @@ module testbench_fp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -583,8 +585,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -593,8 +595,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -603,8 +605,8 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -613,8 +615,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -623,8 +625,8 @@ module testbench_fp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "fma" | TEST === "all") begin // if fma is being tested + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f16fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -632,12 +634,194 @@ module testbench_fp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end + end + end + if (P.IDIV_ON_FPU |1'b1) begin + if (P.Q_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (P.D_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (P.S_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + + end + if (P.ZFH_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (P.XLEN == 64 & P.IDIV_ON_FPU) begin + if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested + Tests = {Tests, int64rem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested + Tests = {Tests, int64div}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested + Tests = {Tests, int64remu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested + Tests = {Tests, int64divu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer remainder is being tested + Tests = {Tests, int64remw}; + OpCtrl = {OpCtrl, `INTREMW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer remainder is being tested + Tests = {Tests, int64remuw}; + OpCtrl = {OpCtrl, `INTREMUW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer division is being tested + Tests = {Tests, int64divw}; + OpCtrl = {OpCtrl, `INTDIVW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer divison is being tested + Tests = {Tests, int64divuw}; + OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + // RV32 + else if (P.IDIV_ON_FPU) begin + if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested + Tests = {Tests, int32rem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested + Tests = {Tests, int32div}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested + Tests = {Tests, int32remu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested + Tests = {Tests, int32divu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end end // check if nothing is being tested + + $display("This simulation for TEST contains %d vectors", Tests.size); if (Tests.size() == 0) begin - $display("TEST %s not supported in this configuration", TEST); - $stop; + $display("TEST %s not supported in this configuration", TEST); + $stop; end end @@ -657,18 +841,17 @@ module testbench_fp; static string pp = `PATH; string testname; string tt0; - tt0 = $sformatf("%s", Tests[TestNum]); + tt0 = $psprintf("%s", Tests[TestNum]); testname = {pp, tt0}; //$display("Here you are %s", testname); - // clear the vectors - for(int i=0; i quad - X = {P.Q_LEN{1'bx}}; + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> quad + X = {P.FLEN{1'bx}}; SrcA = TestVector[8+P.Q_LEN+P.XLEN-1:8+(P.Q_LEN)]; Ans = TestVector[8+(P.Q_LEN-1):8]; - end - 2'b10: begin // int -> quad + end + 2'b10: begin // int -> quad // correctly sign extend the integer depending on if it's a signed/unsigned test - X = {P.Q_LEN{1'bx}}; + X = {P.FLEN{1'bx}}; SrcA = {{P.XLEN-32{TestVector[8+P.Q_LEN+32-1]}}, TestVector[8+P.Q_LEN+32-1:8+(P.Q_LEN)]}; Ans = TestVector[8+(P.Q_LEN-1):8]; - end - 2'b01: begin // quad -> long + end + 2'b01: begin // quad -> long X = {TestVector[8+P.XLEN+P.Q_LEN-1:8+(P.XLEN)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]}; - end - 2'b00: begin // quad -> int + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // quad -> int X = {TestVector[8+32+P.Q_LEN-1:8+(32)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-32){TestVector[8+32-1]}},TestVector[8+(32-1):8]}; - end + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end endcase end 2'b01: if (P.D_SUPPORTED) begin // double // {Int->Fp?, is the integer a long} - casez ({OpCtrl[2:1]}) - 2'b11: begin // long -> double - X = {P.Q_LEN{1'bx}}; + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> double + X = {P.FLEN{1'bx}}; SrcA = TestVector[8+P.D_LEN+P.XLEN-1:8+(P.D_LEN)]; - Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; - end - 2'b10: begin // int -> double + Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; + end + 2'b10: begin // int -> double // correctly sign extend the integer depending on if it's a signed/unsigned test - X = {P.Q_LEN{1'bx}}; + X = {P.FLEN{1'bx}}; SrcA = {{P.XLEN-32{TestVector[8+P.D_LEN+32-1]}}, TestVector[8+P.D_LEN+32-1:8+(P.D_LEN)]}; - Ans = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; - end - 2'b01: begin // double -> long - X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]}; + Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; + end + 2'b01: begin // double -> long + X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]}; - end - 2'b00: begin // double -> int - X = {{P.Q_LEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // double -> int + X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{P.Q_LEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; - end + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end endcase end - 2'b00: if (P.F_SUPPORTED) begin // single + 2'b00: if (P.S_SUPPORTED) begin // single // {is the integer a long, is the opperation to an integer} - casez ({OpCtrl[2:1]}) - 2'b11: begin // long -> single - X = {P.Q_LEN{1'bx}}; + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> single + X = {P.FLEN{1'bx}}; SrcA = TestVector[8+P.S_LEN+P.XLEN-1:8+(P.S_LEN)]; - Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; - end - 2'b10: begin // int -> single + Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; + end + 2'b10: begin // int -> single // correctly sign extend the integer depending on if it's a signed/unsigned test - X = {P.Q_LEN{1'bx}}; + X = {P.FLEN{1'bx}}; SrcA = {{P.XLEN-32{TestVector[8+P.S_LEN+32-1]}}, TestVector[8+P.S_LEN+32-1:8+(P.S_LEN)]}; - Ans = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; - end - 2'b01: begin // single -> long - X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]}; + Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; + end + 2'b01: begin // single -> long + X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]}; - end - 2'b00: begin // single -> int - X = {{P.Q_LEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // single -> int + X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-32){TestVector[8+32-1]}},TestVector[8+(32-1):8]}; - end + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end endcase end 2'b10: begin // half // {is the integer a long, is the opperation to an integer} - casez ({OpCtrl[2:1]}) - 2'b11: begin // long -> half - X = {P.Q_LEN{1'bx}}; + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> half + X = {P.FLEN{1'bx}}; SrcA = TestVector[8+P.H_LEN+P.XLEN-1:8+(P.H_LEN)]; - Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; - end - 2'b10: begin // int -> half + Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; + end + 2'b10: begin // int -> half // correctly sign extend the integer depending on if it's a signed/unsigned test - X = {P.Q_LEN{1'bx}}; + X = {P.FLEN{1'bx}}; SrcA = {{P.XLEN-32{TestVector[8+P.H_LEN+32-1]}}, TestVector[8+P.H_LEN+32-1:8+(P.H_LEN)]}; - Ans = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; - end - 2'b01: begin // half -> long - X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]}; + Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; + end + 2'b01: begin // half -> long + X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-64){1'b0}}, TestVector[8+(64-1):8]}; - end - 2'b00: begin // half -> int - X = {{P.Q_LEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // half -> int + X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]}; SrcA = {P.XLEN{1'bx}}; - Ans = {{(P.Q_LEN-32){TestVector[8+32-1]}}, TestVector[8+(32-1):8]}; - end + Ans = {{P.XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]}; + end endcase end endcase @@ -1382,13 +1686,13 @@ module readvectors import cvw::*; #(parameter cvw_t P) ( end assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); - assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0])); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]) | ((Unit == `INTDIVUNIT) & OpCtrl === `SQRT_OPCTRL)); assign ZEn = (Unit == `FMAUNIT); assign FPUActive = 1'b1; - unpack #(P) unpack(.X(X[P.FLEN-1:0]), .Y(Y[P.FLEN-1:0]), .Z(Z[P.FLEN-1:0]), .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, + unpack #(P) unpack(.X, .Y, .Z, .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, - .XEn, .YEn, .ZEn, .XExpMax, .XPostBox, .Bias(BiasE), .Nf(NfE)); + .XEn, .YEn, .ZEn, .XExpMax, .XPostBox); endmodule diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh new file mode 100644 index 000000000..64babccd4 --- /dev/null +++ b/testbench/tests-fp.vh @@ -0,0 +1,639 @@ +////////////////////////////////////////// +// tests0fo.vh +// +// Written: Katherine Parry 2022 +// Modified: +// +// Purpose: List of floating-point tests to apply +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021-3 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`define PATH "../../tests/fp/vectors/" +`define ADD_OPCTRL 4'b0110 +`define MUL_OPCTRL 4'b0100 +`define SUB_OPCTRL 4'b0111 +`define FMA_OPCTRL 4'b0000 +`define DIV_OPCTRL 4'b0000 +`define SQRT_OPCTRL 4'b0001 +`define LE_OPCTRL 4'b0011 +`define LT_OPCTRL 4'b0001 +`define EQ_OPCTRL 4'b0010 +`define TO_UI_OPCTRL 4'b0000 +`define TO_I_OPCTRL 4'b0001 +`define TO_UL_OPCTRL 4'b0010 +`define TO_L_OPCTRL 4'b0011 +`define FROM_UI_OPCTRL 4'b0100 +`define FROM_I_OPCTRL 4'b0101 +`define FROM_UL_OPCTRL 4'b0110 +`define FROM_L_OPCTRL 4'b0111 +`define INTREMU_OPCTRL 4'b1001 +`define INTREM_OPCTRL 4'b1010 +`define INTDIV_OPCTRL 4'b1011 +`define INTDIVW_OPCTRL 4'b1100 +`define INTDIVU_OPCTRL 4'b1101 +`define INTREMW_OPCTRL 4'b1110 +`define INTREMUW_OPCTRL 4'b1111 +`define INTDIVUW_OPCTRL 4'b1000 +`define RNE 3'b000 +`define RZ 3'b001 +`define RU 3'b011 +`define RD 3'b010 +`define RNM 3'b100 +`define FMAUNIT 2 +`define DIVUNIT 1 +`define CVTINTUNIT 0 +`define CVTFPUNIT 4 +`define CMPUNIT 3 +`define DIVREMSQRTUNIT 5 +`define INTDIVUNIT 6 + +string f16rv32cvtint[] = '{ + "ui32_to_f16_rne.tv", + "ui32_to_f16_rz.tv", + "ui32_to_f16_ru.tv", + "ui32_to_f16_rd.tv", + "ui32_to_f16_rnm.tv", + "i32_to_f16_rne.tv", + "i32_to_f16_rz.tv", + "i32_to_f16_ru.tv", + "i32_to_f16_rd.tv", + "i32_to_f16_rnm.tv", + "f16_to_ui32_rne.tv", + "f16_to_ui32_rz.tv", + "f16_to_ui32_ru.tv", + "f16_to_ui32_rd.tv", + "f16_to_ui32_rnm.tv", + "f16_to_i32_rne.tv", + "f16_to_i32_rz.tv", + "f16_to_i32_ru.tv", + "f16_to_i32_rd.tv", + "f16_to_i32_rnm.tv" +}; + +string f16rv64cvtint[] = '{ + "ui64_to_f16_rne.tv", + "ui64_to_f16_rz.tv", + "ui64_to_f16_ru.tv", + "ui64_to_f16_rd.tv", + "ui64_to_f16_rnm.tv", + "i64_to_f16_rne.tv", + "i64_to_f16_rz.tv", + "i64_to_f16_ru.tv", + "i64_to_f16_rd.tv", + "i64_to_f16_rnm.tv", + "f16_to_ui64_rne.tv", + "f16_to_ui64_rz.tv", + "f16_to_ui64_ru.tv", + "f16_to_ui64_rd.tv", + "f16_to_ui64_rnm.tv", + "f16_to_i64_rne.tv", + "f16_to_i64_rz.tv", + "f16_to_i64_ru.tv", + "f16_to_i64_rd.tv", + "f16_to_i64_rnm.tv" +}; + +string f32rv32cvtint[] = '{ + "ui32_to_f32_rne.tv", + "ui32_to_f32_rz.tv", + "ui32_to_f32_ru.tv", + "ui32_to_f32_rd.tv", + "ui32_to_f32_rnm.tv", + "i32_to_f32_rne.tv", + "i32_to_f32_rz.tv", + "i32_to_f32_ru.tv", + "i32_to_f32_rd.tv", + "i32_to_f32_rnm.tv", + "f32_to_ui32_rne.tv", + "f32_to_ui32_rz.tv", + "f32_to_ui32_ru.tv", + "f32_to_ui32_rd.tv", + "f32_to_ui32_rnm.tv", + "f32_to_i32_rne.tv", + "f32_to_i32_rz.tv", + "f32_to_i32_ru.tv", + "f32_to_i32_rd.tv", + "f32_to_i32_rnm.tv" +}; + +string f32rv64cvtint[] = '{ + "ui64_to_f32_rne.tv", + "ui64_to_f32_rz.tv", + "ui64_to_f32_ru.tv", + "ui64_to_f32_rd.tv", + "ui64_to_f32_rnm.tv", + "i64_to_f32_rne.tv", + "i64_to_f32_rz.tv", + "i64_to_f32_ru.tv", + "i64_to_f32_rd.tv", + "i64_to_f32_rnm.tv", + "f32_to_ui64_rne.tv", + "f32_to_ui64_rz.tv", + "f32_to_ui64_ru.tv", + "f32_to_ui64_rd.tv", + "f32_to_ui64_rnm.tv", + "f32_to_i64_rne.tv", + "f32_to_i64_rz.tv", + "f32_to_i64_ru.tv", + "f32_to_i64_rd.tv", + "f32_to_i64_rnm.tv" +}; + + +string f64rv32cvtint[] = '{ + "ui32_to_f64_rne.tv", + "ui32_to_f64_rz.tv", + "ui32_to_f64_ru.tv", + "ui32_to_f64_rd.tv", + "ui32_to_f64_rnm.tv", + "i32_to_f64_rne.tv", + "i32_to_f64_rz.tv", + "i32_to_f64_ru.tv", + "i32_to_f64_rd.tv", + "i32_to_f64_rnm.tv", + "f64_to_ui32_rne.tv", + "f64_to_ui32_rz.tv", + "f64_to_ui32_ru.tv", + "f64_to_ui32_rd.tv", + "f64_to_ui32_rnm.tv", + "f64_to_i32_rne.tv", + "f64_to_i32_rz.tv", + "f64_to_i32_ru.tv", + "f64_to_i32_rd.tv", + "f64_to_i32_rnm.tv" +}; + +string f64rv64cvtint[] = '{ + "ui64_to_f64_rne.tv", + "ui64_to_f64_rz.tv", + "ui64_to_f64_ru.tv", + "ui64_to_f64_rd.tv", + "ui64_to_f64_rnm.tv", + "i64_to_f64_rne.tv", + "i64_to_f64_rz.tv", + "i64_to_f64_ru.tv", + "i64_to_f64_rd.tv", + "i64_to_f64_rnm.tv", + "f64_to_ui64_rne.tv", + "f64_to_ui64_rz.tv", + "f64_to_ui64_ru.tv", + "f64_to_ui64_rd.tv", + "f64_to_ui64_rnm.tv", + "f64_to_i64_rne.tv", + "f64_to_i64_rz.tv", + "f64_to_i64_ru.tv", + "f64_to_i64_rd.tv", + "f64_to_i64_rnm.tv" +}; + +string f128rv64cvtint[] = '{ + "ui64_to_f128_rne.tv", + "ui64_to_f128_rz.tv", + "ui64_to_f128_ru.tv", + "ui64_to_f128_rd.tv", + "ui64_to_f128_rnm.tv", + "i64_to_f128_rne.tv", + "i64_to_f128_rz.tv", + "i64_to_f128_ru.tv", + "i64_to_f128_rd.tv", + "i64_to_f128_rnm.tv", + "f128_to_ui64_rne.tv", + "f128_to_ui64_rz.tv", + "f128_to_ui64_ru.tv", + "f128_to_ui64_rd.tv", + "f128_to_ui64_rnm.tv", + "f128_to_i64_rne.tv", + "f128_to_i64_rz.tv", + "f128_to_i64_ru.tv", + "f128_to_i64_rd.tv", + "f128_to_i64_rnm.tv" +}; + +string f128rv32cvtint[] = '{ + "ui32_to_f128_rne.tv", + "ui32_to_f128_rz.tv", + "ui32_to_f128_ru.tv", + "ui32_to_f128_rd.tv", + "ui32_to_f128_rnm.tv", + "i32_to_f128_rne.tv", + "i32_to_f128_rz.tv", + "i32_to_f128_ru.tv", + "i32_to_f128_rd.tv", + "i32_to_f128_rnm.tv", + "f128_to_ui32_rne.tv", + "f128_to_ui32_rz.tv", + "f128_to_ui32_ru.tv", + "f128_to_ui32_rd.tv", + "f128_to_ui32_rnm.tv", + "f128_to_i32_rne.tv", + "f128_to_i32_rz.tv", + "f128_to_i32_ru.tv", + "f128_to_i32_rd.tv", + "f128_to_i32_rnm.tv" +}; + +string f32f16cvt[] = '{ + "f32_to_f16_rne.tv", + "f32_to_f16_rz.tv", + "f32_to_f16_ru.tv", + "f32_to_f16_rd.tv", + "f32_to_f16_rnm.tv", + "f16_to_f32_rne.tv", + "f16_to_f32_rz.tv", + "f16_to_f32_ru.tv", + "f16_to_f32_rd.tv", + "f16_to_f32_rnm.tv" +}; + +string f64f16cvt[] = '{ + "f64_to_f16_rne.tv", + "f64_to_f16_rz.tv", + "f64_to_f16_ru.tv", + "f64_to_f16_rd.tv", + "f64_to_f16_rnm.tv", + "f16_to_f64_rne.tv", + "f16_to_f64_rz.tv", + "f16_to_f64_ru.tv", + "f16_to_f64_rd.tv", + "f16_to_f64_rnm.tv" +}; + +string f128f16cvt[] = '{ + "f128_to_f16_rne.tv", + "f128_to_f16_rz.tv", + "f128_to_f16_ru.tv", + "f128_to_f16_rd.tv", + "f128_to_f16_rnm.tv", + "f16_to_f128_rne.tv", + "f16_to_f128_rz.tv", + "f16_to_f128_ru.tv", + "f16_to_f128_rd.tv", + "f16_to_f128_rnm.tv" +}; + +string f64f32cvt[] = '{ + "f64_to_f32_rne.tv", + "f64_to_f32_rz.tv", + "f64_to_f32_ru.tv", + "f64_to_f32_rd.tv", + "f64_to_f32_rnm.tv", + "f32_to_f64_rne.tv", + "f32_to_f64_rz.tv", + "f32_to_f64_ru.tv", + "f32_to_f64_rd.tv", + "f32_to_f64_rnm.tv" +}; + +string f128f32cvt[] = '{ + "f128_to_f32_rne.tv", + "f128_to_f32_rz.tv", + "f128_to_f32_ru.tv", + "f128_to_f32_rd.tv", + "f128_to_f32_rnm.tv", + "f32_to_f128_rne.tv", + "f32_to_f128_rz.tv", + "f32_to_f128_ru.tv", + "f32_to_f128_rd.tv", + "f32_to_f128_rnm.tv" +}; + +string f128f64cvt[] = '{ + "f128_to_f64_rne.tv", + "f128_to_f64_rz.tv", + "f128_to_f64_ru.tv", + "f128_to_f64_rd.tv", + "f128_to_f64_rnm.tv", + "f64_to_f128_rne.tv", + "f64_to_f128_rz.tv", + "f64_to_f128_ru.tv", + "f64_to_f128_rd.tv", + "f64_to_f128_rnm.tv" +}; + +string f16add[] = '{ + "f16_add_rne.tv", + "f16_add_rz.tv", + "f16_add_ru.tv", + "f16_add_rd.tv", + "f16_add_rnm.tv" +}; + +string f32add[] = '{ + "f32_add_rne.tv", + "f32_add_rz.tv", + "f32_add_ru.tv", + "f32_add_rd.tv", + "f32_add_rnm.tv" +}; + +string f64add[] = '{ + "f64_add_rne.tv", + "f64_add_rz.tv", + "f64_add_ru.tv", + "f64_add_rd.tv", + "f64_add_rnm.tv" +}; + +string f128add[] = '{ + "f128_add_rne.tv", + "f128_add_rz.tv", + "f128_add_ru.tv", + "f128_add_rd.tv", + "f128_add_rnm.tv" +}; + +string f16sub[] = '{ + "f16_sub_rne.tv", + "f16_sub_rz.tv", + "f16_sub_ru.tv", + "f16_sub_rd.tv", + "f16_sub_rnm.tv" +}; + +string f32sub[] = '{ + "f32_sub_rne.tv", + "f32_sub_rz.tv", + "f32_sub_ru.tv", + "f32_sub_rd.tv", + "f32_sub_rnm.tv" +}; + +string f64sub[] = '{ + "f64_sub_rne.tv", + "f64_sub_rz.tv", + "f64_sub_ru.tv", + "f64_sub_rd.tv", + "f64_sub_rnm.tv" +}; + +string f128sub[] = '{ + "f128_sub_rne.tv", + "f128_sub_rz.tv", + "f128_sub_ru.tv", + "f128_sub_rd.tv", + "f128_sub_rnm.tv" +}; + +string f16mul[] = '{ + "f16_mul_rne.tv", + "f16_mul_rz.tv", + "f16_mul_ru.tv", + "f16_mul_rd.tv", + "f16_mul_rnm.tv" +}; + +string f32mul[] = '{ + "f32_mul_rne.tv", + "f32_mul_rz.tv", + "f32_mul_ru.tv", + "f32_mul_rd.tv", + "f32_mul_rnm.tv" +}; + +string f64mul[] = '{ + "f64_mul_rne.tv", + "f64_mul_rz.tv", + "f64_mul_ru.tv", + "f64_mul_rd.tv", + "f64_mul_rnm.tv" +}; + +string f128mul[] = '{ + "f128_mul_rne.tv", + "f128_mul_rz.tv", + "f128_mul_ru.tv", + "f128_mul_rd.tv", + "f128_mul_rnm.tv" +}; + +string f16div[] = '{ + "f16_div_rne.tv", + "f16_div_rz.tv", + "f16_div_ru.tv", + "f16_div_rd.tv", + "f16_div_rnm.tv" +}; + +string f32div[] = '{ + "f32_div_rne.tv", + "f32_div_rz.tv", + "f32_div_ru.tv", + "f32_div_rd.tv", + "f32_div_rnm.tv" +}; + +string f64div[] = '{ + "f64_div_rne.tv", + "f64_div_rz.tv", + "f64_div_ru.tv", + "f64_div_rd.tv", + "f64_div_rnm.tv" +}; + +string f128div[] = '{ + "f128_div_rne.tv", + "f128_div_rz.tv", + "f128_div_ru.tv", + "f128_div_rd.tv", + "f128_div_rnm.tv" +}; + +string f16sqrt[] = '{ + "f16_sqrt_rne.tv", + "f16_sqrt_rz.tv", + "f16_sqrt_ru.tv", + "f16_sqrt_rd.tv", + "f16_sqrt_rnm.tv" +}; + +string f32sqrt[] = '{ + "f32_sqrt_rne.tv", + "f32_sqrt_rz.tv", + "f32_sqrt_ru.tv", + "f32_sqrt_rd.tv", + "f32_sqrt_rnm.tv" +}; + +string f64sqrt[] = '{ + "f64_sqrt_rne.tv", + "f64_sqrt_rz.tv", + "f64_sqrt_ru.tv", + "f64_sqrt_rd.tv", + "f64_sqrt_rnm.tv" +}; + +string f128sqrt[] = '{ + "f128_sqrt_rne.tv", + "f128_sqrt_rz.tv", + "f128_sqrt_ru.tv", + "f128_sqrt_rd.tv", + "f128_sqrt_rnm.tv" +}; + +string f16cmp[] = '{ + "f16_eq_rne.tv", + "f16_eq_rz.tv", + "f16_eq_ru.tv", + "f16_eq_rd.tv", + "f16_eq_rnm.tv", + "f16_le_rne.tv", + "f16_le_rz.tv", + "f16_le_ru.tv", + "f16_le_rd.tv", + "f16_le_rnm.tv", + "f16_lt_rne.tv", + "f16_lt_rz.tv", + "f16_lt_ru.tv", + "f16_lt_rd.tv", + "f16_lt_rnm.tv" +}; + +string f32cmp[] = '{ + "f32_eq_rne.tv", + "f32_eq_rz.tv", + "f32_eq_ru.tv", + "f32_eq_rd.tv", + "f32_eq_rnm.tv", + "f32_le_rne.tv", + "f32_le_rz.tv", + "f32_le_ru.tv", + "f32_le_rd.tv", + "f32_le_rnm.tv", + "f32_lt_rne.tv", + "f32_lt_rz.tv", + "f32_lt_ru.tv", + "f32_lt_rd.tv", + "f32_lt_rnm.tv" +}; + +string f64cmp[] = '{ + "f64_eq_rne.tv", + "f64_eq_rz.tv", + "f64_eq_ru.tv", + "f64_eq_rd.tv", + "f64_eq_rnm.tv", + "f64_le_rne.tv", + "f64_le_rz.tv", + "f64_le_ru.tv", + "f64_le_rd.tv", + "f64_le_rnm.tv", + "f64_lt_rne.tv", + "f64_lt_rz.tv", + "f64_lt_ru.tv", + "f64_lt_rd.tv", + "f64_lt_rnm.tv" +}; + +string f128cmp[] = '{ + "f128_eq_rne.tv", + "f128_eq_rz.tv", + "f128_eq_ru.tv", + "f128_eq_rd.tv", + "f128_eq_rnm.tv", + "f128_le_rne.tv", + "f128_le_rz.tv", + "f128_le_ru.tv", + "f128_le_rd.tv", + "f128_le_rnm.tv", + "f128_lt_rne.tv", + "f128_lt_rz.tv", + "f128_lt_ru.tv", + "f128_lt_rd.tv", + "f128_lt_rnm.tv" +}; + +string f16fma[] = '{ + "f16_mulAdd_rne.tv", + "f16_mulAdd_rz.tv", + "f16_mulAdd_ru.tv", + "f16_mulAdd_rd.tv", + "f16_mulAdd_rnm.tv" +}; + +string f32fma[] = '{ + "f32_mulAdd_rne.tv", + "f32_mulAdd_rz.tv", + "f32_mulAdd_ru.tv", + "f32_mulAdd_rd.tv", + "f32_mulAdd_rnm.tv" +}; + +string f64fma[] = '{ + "f64_mulAdd_rne.tv", + "f64_mulAdd_rz.tv", + "f64_mulAdd_ru.tv", + "f64_mulAdd_rd.tv", + "f64_mulAdd_rnm.tv" +}; + +string f128fma[] = '{ + "f128_mulAdd_rne.tv", + "f128_mulAdd_rz.tv", + "f128_mulAdd_ru.tv", + "f128_mulAdd_rd.tv", + "f128_mulAdd_rnm.tv" +}; + +string int64rem[] = '{ + "cvw_64_rem-01.tv" +}; + +string int64div[] = '{ + "cvw_64_div-01.tv" +}; + +string int64remu[] = '{ + "cvw_64_remu-01.tv" +}; + +string int64divu[] = '{ + "cvw_64_divu-01.tv" +}; + +string int64remw[] = '{ + "cvw_64_remw-01.tv" +}; + +string int64remuw[] = '{ + "cvw_64_remuw-01.tv" +}; + +string int64divuw[] = '{ + "cvw_64_divuw-01.tv" +}; + +string int64divw[] = '{ + "cvw_64_divw-01.tv" +}; + +string int32rem[] = '{ + "cvw_32_rem-01.tv" +}; + +string int32div[] = '{ + "cvw_32_div-01.tv" +}; + +string int32remu[] = '{ + "cvw_32_remu-01.tv" +}; + +string int32divu[] = '{ + "cvw_32_divu-01.tv" +}; diff --git a/tests/custom/spitest/Makefile b/tests/custom/spitest/Makefile new file mode 100644 index 000000000..34e83a9e4 --- /dev/null +++ b/tests/custom/spitest/Makefile @@ -0,0 +1,112 @@ +CEXT := c +CPPEXT := cpp +AEXT := s +SEXT := S +SRCEXT := \([$(CEXT)$(AEXT)$(SEXT)]\|$(CPPEXT)\) +OBJEXT := o +DEPEXT := d +SRCDIR := . +BUILDDIR := OBJ + +SOURCES ?= $(shell find $(SRCDIR) -type f -regex ".*\.$(SRCEXT)" | sort) +OBJECTS := $(SOURCES:.$(CEXT)=.$(OBJEXT)) +OBJECTS := $(OBJECTS:.$(AEXT)=.$(OBJEXT)) +OBJECTS := $(OBJECTS:.$(SEXT)=.$(OBJEXT)) +OBJECTS := $(OBJECTS:.$(CPPEXT)=.$(OBJEXT)) +OBJECTS := $(patsubst $(SRCDIR)/%,$(BUILDDIR)/%,$(OBJECTS)) + +TARGETDIR := bin +TARGET := $(TARGETDIR)/spitest.elf +ROOT := .. +LIBRARY_DIRS := +LIBRARY_FILES := + +MARCH :=-march=rv64imfdc +MABI :=-mabi=lp64d +LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles +LINKER :=$(ROOT)/linker8000-0000.x + + +AFLAGS =$(MARCH) $(MABI) -W +CFLAGS =$(MARCH) $(MABI) -mcmodel=medany -O2 +AS=riscv64-unknown-elf-as +CC=riscv64-unknown-elf-gcc +AR=riscv64-unknown-elf-ar + + +#Default Make +all: directories $(TARGET).memfile + +#Remake +remake: clean all + +#Make the Directories +directories: + @mkdir -p $(TARGETDIR) + @mkdir -p $(BUILDDIR) + +clean: + rm -rf $(BUILDDIR) $(TARGETDIR) *.memfile *.objdump + + +#Needed for building additional library projects +ifdef LIBRARY_DIRS +LIBS+=${LIBRARY_DIRS:%=-L%} ${LIBRARY_FILES:%=-l%} +INC+=${LIBRARY_DIRS:%=-I%} + +${LIBRARY_DIRS}: + make -C $@ -j 1 + +.PHONY: $(LIBRARY_DIRS) $(TARGET) +endif + + +#Pull in dependency info for *existing* .o files +-include $(OBJECTS:.$(OBJEXT)=.$(DEPEXT)) + +#Link +$(TARGET): $(OBJECTS) $(LIBRARY_DIRS) + $(CC) $(LINK_FLAGS) -g -o $(TARGET) $(OBJECTS) ${LIBS} -T ${LINKER} + + +#Compile +$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CEXT) + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list + @$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CEXT) > $(BUILDDIR)/$*.$(DEPEXT) + @cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp + @sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT) + @sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT) + @rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp + +# gcc won't output dependencies for assembly files for some reason +# most asm files don't have dependencies so the echo will work for now. +$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(AEXT) + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) -c -o $@ $< > $(BUILDDIR)/$*.list + @echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT) + +$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(SEXT) + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list + @echo $@: $< > $(BUILDDIR)/$*.$(DEPEXT) + +# C++ +$(BUILDDIR)/%.$(OBJEXT): $(SRCDIR)/%.$(CPPEXT) + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) $(INC) -c -o $@ $< > $(BUILDDIR)/$*.list + @$(CC) $(CFLAGS) $(INC) -MM $(SRCDIR)/$*.$(CPPEXT) > $(BUILDDIR)/$*.$(DEPEXT) + @cp -f $(BUILDDIR)/$*.$(DEPEXT) $(BUILDDIR)/$*.$(DEPEXT).tmp + @sed -e 's|.*:|$(BUILDDIR)/$*.$(OBJEXT):|' < $(BUILDDIR)/$*.$(DEPEXT).tmp > $(BUILDDIR)/$*.$(DEPEXT) + @sed -e 's/.*://' -e 's/\\$$//' < $(BUILDDIR)/$*.$(DEPEXT).tmp | fmt -1 | sed -e 's/^ *//' -e 's/$$/:/' >> $(BUILDDIR)/$*.$(DEPEXT) + @rm -f $(BUILDDIR)/$*.$(DEPEXT).tmp + +# convert to hex +$(TARGET).memfile: $(TARGET) + @echo 'Making object dump file.' + @riscv64-unknown-elf-objdump -D $< > $<.objdump + @echo 'Making memory file' + riscv64-unknown-elf-elf2hex --bit-width 64 --input $^ --output $@ + extractFunctionRadix.sh $<.objdump + mkdir -p ../work/ + cp -f $(TARGETDIR)/* ../work/ diff --git a/tests/custom/spitest/spi.h b/tests/custom/spitest/spi.h new file mode 100644 index 000000000..2b1d541da --- /dev/null +++ b/tests/custom/spitest/spi.h @@ -0,0 +1,116 @@ +/////////////////////////////////////////////////////////////////////// +// spi.h +// +// Written: Jaocb Pease jacob.pease@okstate.edu 7/22/2024 +// +// Purpose: Header file for interfaceing with the SPI peripheral +// +// +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the +// “License”); you may not use this file except in compliance with the +// License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#pragma once +#ifndef SPI_HEADER +#define SPI_HEADER + +#include + +#define SPI_BASE 0x13000 /* Base address of SPI device used for SDC */ + +/* register offsets */ +#define SPI_SCKDIV SPI_BASE + 0x00 /* Serial clock divisor */ +#define SPI_SCKMODE SPI_BASE + 0x04 /* Serial clock mode */ +#define SPI_CSID SPI_BASE + 0x10 /* Chip select ID */ +#define SPI_CSDEF SPI_BASE + 0x14 /* Chip select default */ +#define SPI_CSMODE SPI_BASE + 0x18 /* Chip select mode */ +#define SPI_DELAY0 SPI_BASE + 0x28 /* Delay control 0 */ +#define SPI_DELAY1 SPI_BASE + 0x2c /* Delay control 1 */ +#define SPI_FMT SPI_BASE + 0x40 /* Frame format */ +#define SPI_TXDATA SPI_BASE + 0x48 /* Tx FIFO data */ +#define SPI_RXDATA SPI_BASE + 0x4c /* Rx FIFO data */ +#define SPI_TXMARK SPI_BASE + 0x50 /* Tx FIFO [<35;39;29Mwatermark */ +#define SPI_RXMARK SPI_BASE + 0x54 /* Rx FIFO watermark */ + +/* Non-implemented +#define SPI_FCTRL SPI_BASE + 0x60 // SPI flash interface control +#define SPI_FFMT SPI_BASE + 0x64 // SPI flash instruction format +*/ +#define SPI_IE SPI_BASE + 0x70 /* Interrupt Enable Register */ +#define SPI_IP SPI_BASE + 0x74 /* Interrupt Pendings Register */ + +/* delay0 bits */ +#define SIFIVE_SPI_DELAY0_CSSCK(x) ((uint32_t)(x)) +#define SIFIVE_SPI_DELAY0_CSSCK_MASK 0xffU +#define SIFIVE_SPI_DELAY0_SCKCS(x) ((uint32_t)(x) << 16) +#define SIFIVE_SPI_DELAY0_SCKCS_MASK (0xffU << 16) + +/* delay1 bits */ +#define SIFIVE_SPI_DELAY1_INTERCS(x) ((uint32_t)(x)) +#define SIFIVE_SPI_DELAY1_INTERCS_MASK 0xffU +#define SIFIVE_SPI_DELAY1_INTERXFR(x) ((uint32_t)(x) << 16) +#define SIFIVE_SPI_DELAY1_INTERXFR_MASK (0xffU << 16) + +/* csmode bits */ +#define SIFIVE_SPI_CSMODE_MODE_AUTO 0U +#define SIFIVE_SPI_CSMODE_MODE_HOLD 2U +#define SIFIVE_SPI_CSMODE_MODE_OFF 3U + +// inline void write_reg(uintptr_t addr, uint32_t value); +//inline uint32_t read_reg(uintptr_t addr); +//inline void spi_sendbyte(uint8_t byte); +//inline void waittx(); +//inline void waitrx(); +uint8_t spi_txrx(uint8_t byte); +uint8_t spi_dummy(); +//inline uint8_t spi_readbyte(); +//uint64_t spi_read64(); +void spi_init(); +void spi_set_clock(uint32_t clkin, uint32_t clkout); + +static inline void write_reg(uintptr_t addr, uint32_t value) { + volatile uint32_t * loc = (volatile uint32_t *) addr; + *loc = value; +} + +// Read a register +static inline uint32_t read_reg(uintptr_t addr) { + return *(volatile uint32_t *) addr; +} + +// Queues a single byte in the transfer fifo +static inline void spi_sendbyte(uint8_t byte) { + // Write byte to transfer fifo + write_reg(SPI_TXDATA, byte); +} + +static inline void waittx() { + while(!(read_reg(SPI_IP) & 1)) {} +} + +static inline void waitrx() { + while(read_reg(SPI_IP) & 2) {} +} + +static inline uint8_t spi_readbyte() { + return read_reg(SPI_RXDATA); +} + +#endif diff --git a/tests/custom/spitest/spitest.c b/tests/custom/spitest/spitest.c new file mode 100644 index 000000000..23d408c16 --- /dev/null +++ b/tests/custom/spitest/spitest.c @@ -0,0 +1,107 @@ +/////////////////////////////////////////////////////////////////////// +// spi.c +// +// Written: Jaocb Pease jacob.pease@okstate.edu 8/27/2024 +// +// Purpose: C code to test SPI bugs +// +// +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the +// “License”); you may not use this file except in compliance with the +// License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "spi.h" + +// Testing SPI peripheral in loopback mode +// TODO: Need to make sure the configuration I'm using uses loopback +// mode. This can be specified in derivlists.txt +// TODO: + +uint8_t spi_txrx(uint8_t byte) { + spi_sendbyte(byte); + waittx(); + return spi_readbyte(); +} + +uint8_t spi_dummy() { + return spi_txrx(0xff); +} + +void spi_set_clock(uint32_t clkin, uint32_t clkout) { + uint32_t div = (clkin/(2*clkout)) - 1; + write_reg(SPI_SCKDIV, div); +} + +// Initialize Sifive FU540 based SPI Controller +void spi_init(uint32_t clkin) { + // Enable interrupts + write_reg(SPI_IE, 0x3); + + // Set TXMARK to 1. If the number of entries is < 1 + // IP's txwm field will go high. + // Set RXMARK to 0. If the number of entries is > 0 + // IP's rwxm field will go high. + write_reg(SPI_TXMARK, 1); + write_reg(SPI_RXMARK, 0); + + // Set Delay 0 to default + write_reg(SPI_DELAY0, + SIFIVE_SPI_DELAY0_CSSCK(1) | + SIFIVE_SPI_DELAY0_SCKCS(1)); + + // Set Delay 1 to default + write_reg(SPI_DELAY1, + SIFIVE_SPI_DELAY1_INTERCS(1) | + SIFIVE_SPI_DELAY1_INTERXFR(0)); + + // Initialize the SPI controller clock to + // div = (20MHz/(2*400kHz)) - 1 = 24 = 0x18 + write_reg(SPI_SCKDIV, 0x18); +} + +void main() { + spi_init(100000000); + + spi_set_clock(100000000,50000000); + + volatile uint8_t *p = (uint8_t *)(0x8F000000); + int j; + uint64_t n = 0; + + write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_HOLD); + //n = 512/8; + + n = 4; + do { + // Send 8 dummy bytes (fifo should be empty) + for (j = 0; j < 8; j++) { + spi_sendbyte(0xaa + j); + } + + // Reset counter. Process bytes AS THEY COME IN. + for (j = 0; j < 8; j++) { + while (!(read_reg(SPI_IP) & 2)) {} + uint8_t x = spi_readbyte(); + *p++ = x; + } + } while(--n > 0); + + write_reg(SPI_CSMODE, SIFIVE_SPI_CSMODE_MODE_AUTO); +} diff --git a/tests/custom/spitest/start.s b/tests/custom/spitest/start.s new file mode 100644 index 000000000..57f66ce79 --- /dev/null +++ b/tests/custom/spitest/start.s @@ -0,0 +1,59 @@ +.section .init +.global _start +.type _start, @function + +_start: + # Initialize global pointer + .option push + .option norelax + 1:auipc gp, %pcrel_hi(__global_pointer$) + addi gp, gp, %pcrel_lo(1b) + .option pop + + li x1, 0 + li x2, 0 + li x4, 0 + li x5, 0 + li x6, 0 + li x7, 0 + li x8, 0 + li x9, 0 + li x10, 0 + li x11, 0 + li x12, 0 + li x13, 0 + li x14, 0 + li x15, 0 + li x16, 0 + li x17, 0 + li x18, 0 + li x19, 0 + li x20, 0 + li x21, 0 + li x22, 0 + li x23, 0 + li x24, 0 + li x25, 0 + li x26, 0 + li x27, 0 + li x28, 0 + li x29, 0 + li x30, 0 + li x31, 0 + + + + # set the stack pointer to the top of memory - 8 bytes (pointer size) + li sp, 0x87FFFFF8 + + jal ra, main + jal ra, _halt + +.section .text +.global _halt +.type _halt, @function +_halt: + li gp, 1 + li a0, 0 + ecall + j _halt diff --git a/tests/fp/combined_IF_vectors/create_IF_vectors.sh b/tests/fp/combined_IF_vectors/create_IF_vectors.sh index 707b2d5f4..7fe5897fb 100755 --- a/tests/fp/combined_IF_vectors/create_IF_vectors.sh +++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh @@ -1,5 +1,7 @@ #!/bin/sh # create test vectors for stand alone int +mkdir IF_vectors ./extract_testfloat_vectors.py ./extract_arch_vectors.py +cp IF_vectors/* ../vectors