From fda6305d1c581a08b147650b409c087a7c89937e Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Tue, 27 Aug 2024 17:07:35 -0700 Subject: [PATCH] began porting over divremsqrt --- bin/regression-wally-intdiv | 561 ++++++ config/shared/config-shared.vh | 4 + config/shared/parameter-defs.vh | 2 + src/cvw.sv | 2 + src/fpu/divremsqrt/arithrightshift.sv | 9 + src/fpu/divremsqrt/divremsqrt.sv | 111 ++ src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv | 73 + src/fpu/divremsqrt/divremsqrtearlyterm.sv | 27 + .../divremsqrt/divremsqrtfdivsqrtpostproc.sv | 116 ++ .../divremsqrt/divremsqrtfdivsqrtpreproc.sv | 250 +++ src/fpu/divremsqrt/divremsqrtflags.sv | 183 ++ .../divremsqrt/divremsqrtintspecialcase.sv | 15 + src/fpu/divremsqrt/divremsqrtlzc.sv | 39 + src/fpu/divremsqrt/divremsqrtnormshift.sv | 81 + src/fpu/divremsqrt/divremsqrtpostprocess.sv | 177 ++ src/fpu/divremsqrt/divremsqrtround.sv | 267 +++ src/fpu/divremsqrt/divremsqrtroundsign.sv | 45 + .../divremsqrt/divremsqrtshiftcorrection.sv | 94 + src/fpu/divremsqrt/divremsqrtspecialcase.sv | 240 +++ src/fpu/divremsqrt/drsu.sv | 102 + src/fpu/divremsqrt/intrightshift.sv | 37 + testbench/testbench-fp.sv | 1682 +++++++++++++++++ .../combined_IF_vectors/create_IF_vectors.sh | 2 + 23 files changed, 4119 insertions(+) create mode 100755 bin/regression-wally-intdiv create mode 100644 src/fpu/divremsqrt/arithrightshift.sv create mode 100644 src/fpu/divremsqrt/divremsqrt.sv create mode 100644 src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv create mode 100644 src/fpu/divremsqrt/divremsqrtearlyterm.sv create mode 100644 src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv create mode 100644 src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv create mode 100644 src/fpu/divremsqrt/divremsqrtflags.sv create mode 100644 src/fpu/divremsqrt/divremsqrtintspecialcase.sv create mode 100644 src/fpu/divremsqrt/divremsqrtlzc.sv create mode 100644 src/fpu/divremsqrt/divremsqrtnormshift.sv create mode 100644 src/fpu/divremsqrt/divremsqrtpostprocess.sv create mode 100644 src/fpu/divremsqrt/divremsqrtround.sv create mode 100644 src/fpu/divremsqrt/divremsqrtroundsign.sv create mode 100644 src/fpu/divremsqrt/divremsqrtshiftcorrection.sv create mode 100644 src/fpu/divremsqrt/divremsqrtspecialcase.sv create mode 100644 src/fpu/divremsqrt/drsu.sv create mode 100644 src/fpu/divremsqrt/intrightshift.sv create mode 100644 testbench/testbench-fp.sv diff --git a/bin/regression-wally-intdiv b/bin/regression-wally-intdiv new file mode 100755 index 000000000..5efe0dc24 --- /dev/null +++ b/bin/regression-wally-intdiv @@ -0,0 +1,561 @@ +#!/usr/bin/python3 +################################## +# +# regression-wally +# David_Harris@Hmc.edu 25 January 2021 +# Modified by Jarred Allen +# +# Run a regression with multiple configurations in parallel and exit with +# non-zero status code if an error happened, as well as printing human-readable +# output. +# +################################## +import sys,os,shutil +import multiprocessing + + + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +from collections import namedtuple +regressionDir = os.path.dirname(os.path.abspath(__file__)) +os.chdir(regressionDir) + +coverage = '-coverage' in sys.argv +fp = '-fp' in sys.argv +nightly = '-nightly' in sys.argv +softfloat = '-softfloat' in sys.argv +intdiv = '-intdiv' in sys.argv + +TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) +# name: the name of this test configuration (used in printing human-readable +# output and picking logfile names) +# cmd: the command to run to test (should include the logfile as '{}', and +# the command needs to write to that file) +# grepstr: the string to grep through the log file for. The test succeeds iff +# grep finds that string in the logfile (is used by grep, so it may +# be any pattern grep accepts, see `man 1 grep` for more info). + +# edit this list to add more test cases +if (nightly): + nightMode = "-nightly"; + configs = [] +else: + nightMode = ""; + configs = [ + TestCase( + name="lints", + variant="all", + cmd="./lint-wally " + nightMode + " | tee {}", + grepstr="lints run with no errors or warnings" + ) + ] + +def getBuildrootTC(boot): + INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM + MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt. + if boot: + name="buildrootboot" + BRcmd="vsim > {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < XLEN +# ["nodcache_rv32gc", ["ahb32"]], +# ["nocache_rv32gc", ["ahb32"]], + ["noicache_rv64gc", ["ahb64"]], + ["nodcache_rv64gc", ["ahb64"]], + ["nocache_rv64gc", ["ahb64"]], + + ### add misaligned tests + + ["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + + ### branch predictor simulation + + # ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + + # ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + + # # btb + # ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + + # # ras + # ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + +# enable floating-point tests when lint is fixed + ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], + ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]], + ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]], + ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]], + ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed + ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]], + ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]], + + + ] + for test in derivconfigtests: + config = test[0]; + tests = test[1]; + if(len(test) >= 4 and test[2] == "configOptions"): + configOptions = test[3] + cmdPrefix = "vsim > {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < /dev/null" % (text, logfile) + return os.system(grepcmd) == 0 + +def run_test_case(config): + """Run the given test case, and return 0 if the test suceeds and 1 if it fails""" + logname = "logs/"+config.variant+"_"+config.name+".log" + cmd = config.cmd.format(logname) +# print(cmd) + os.chdir(regressionDir) + os.system(cmd) + if search_log_for_text(config.grepstr, logname): + print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name)) + return 0 + else: + print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name)) + print(" Check %s" % logname) + return 1 + +def main(): + """Run the tests and count the failures""" + global configs, coverage + try: + os.chdir(regressionDir) + os.mkdir("logs") + except: + pass + try: + shutil.rmtree("wkdir") + except: + pass + finally: + os.mkdir("wkdir") + + if '-makeTests' in sys.argv: + os.chdir(regressionDir) + os.system('./make-tests.sh | tee ./logs/make-tests.log') + + if '-all' in sys.argv: + TIMEOUT_DUR = 30*7200 # seconds + configs.append(getBuildrootTC(boot=True)) + elif '-buildroot' in sys.argv: + TIMEOUT_DUR = 30*7200 # seconds + configs=[getBuildrootTC(boot=True)] + elif '-coverage' in sys.argv: + TIMEOUT_DUR = 20*60 # seconds + # Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage. + # Also it is slow to run. + # configs.append(getBuildrootTC(boot=False)) + os.system('rm -f cov/*.ucdb') + elif '-nightly' in sys.argv: + TIMEOUT_DUR = 60*1440 # 1 day + configs.append(getBuildrootTC(boot=False)) + elif '-softfloat' in sys.argv: + TIMEOUT_DUR = 60*60 # seconds + elif '-intdiv' in sys.argv: + TIMEOUT_DUR = 60*60 # seconds + else: + TIMEOUT_DUR = 10*60 # seconds + configs.append(getBuildrootTC(boot=False)) + + # Scale the number of concurrent processes to the number of test cases, but + # max out at a limited number of concurrent processes to not overwhelm the system + with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool: + num_fail = 0 + results = {} + for config in configs: + results[config] = pool.apply_async(run_test_case,(config,)) + for (config,result) in results.items(): + try: + num_fail+=result.get(timeout=TIMEOUT_DUR) + except TimeoutError: + num_fail+=1 + print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR)) + + # Coverage report + if coverage: + os.system('make coverage') + # Count the number of failures + if num_fail: + print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail) + else: + print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}") + return num_fail + +if __name__ == '__main__': + exit(main()) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 91e1d4100..445dc392f 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) +localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF)); +localparam NORMSHIFTSZDRSU = DIVb+1+NF; +localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU); + // Disable spurious Verilator warnings /* verilator lint_off STMTDLY */ diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index c80b00232..bb036c94d 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -194,6 +194,8 @@ localparam cvw_t P = '{ FMALEN : FMALEN, NORMSHIFTSZ : NORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, + NORMSHIFTSZDRSU : NORMSHIFTSZDRSU, + LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU, LOGR : LOGR, RK : RK, FPDUR : FPDUR, diff --git a/src/cvw.sv b/src/cvw.sv index ed0493484..94006274b 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -285,6 +285,8 @@ typedef struct packed { int LOGCVTLEN; int NORMSHIFTSZ; int LOGNORMSHIFTSZ; + int NORMSHIFTSZDRSU; + int LOGNORMSHIFTSZDRSU; int FMALEN; // division constants diff --git a/src/fpu/divremsqrt/arithrightshift.sv b/src/fpu/divremsqrt/arithrightshift.sv new file mode 100644 index 000000000..624a54751 --- /dev/null +++ b/src/fpu/divremsqrt/arithrightshift.sv @@ -0,0 +1,9 @@ + +module arithrightshift import cvw::*; #(parameter cvw_t P) ( + input logic signed [P.INTDIVb+3:0] shiftin, + output logic signed [P.INTDIVb+3:0] shifted +); + assign shifted = $signed(shiftin) >>> P.LOGR; + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv new file mode 100644 index 000000000..f57a3f8dd --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrt.sv @@ -0,0 +1,111 @@ +/////////////////////////////////////////// +// divremsqrt.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + + module divremsqrt import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic [P.FMTBITS-1:0] FmtE, + input logic XsE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + output logic DivStickyM, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [P.NE+1:0] UeM, + output logic [P.DIVb:0] UmM, + output logic [P.XLEN-1:0] FIntDivResultM, + output logic IntDivM, + // integer normalization shifter signals + output logic [P.INTDIVb+3:0] PreResultM, + input logic [P.XLEN-1:0] PreIntResultM, + output logic [P.DIVBLEN-1:0] IntNormShiftM + +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic [P.DURLEN:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM; // Special handling for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + + + divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc( // Preprocessor + .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), + .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, + // Int-specific + .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, + .BZeroM, .AM, + .IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM); + + fdivsqrtfsm #(P) fdivsqrtfsm( // FSM + .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, + .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, + // Int-specific + .IDivStartE, .ISpecialCaseE, .IntDivE); + + fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator + .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, + .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); + + divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor + .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, + .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, + .UmM, .WZeroE, .DivStickyM, + // Int-specific + .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .FIntDivResultM, .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM); + + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv new file mode 100644 index 000000000..640735bef --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv @@ -0,0 +1,73 @@ +/////////////////////////////////////////// +// divshiftcalc.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Division shift calculation +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtdivshiftcalc import cvw::*; #(parameter cvw_t P) ( + input logic [P.NF+2:0] DivUm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent + output logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt, // divsqrt shift amount + output logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn, // divsqrt shift input + output logic DivResSubnorm, // is the divsqrt result subnormal + output logic DivSubnormShiftPos // is the subnormal shift amount positive +); + + logic [P.LOGNORMSHIFTSZDRSU-1:0] NormShift; // normalized result shift amount + logic [P.LOGNORMSHIFTSZDRSU-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negative) + logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount + + // is the result subnormal + // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes + assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]); + + // if the result is subnormal + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1 + // .0000xxxxxxxxxxx... >> 1 Exp = 1 + // Left shift amount = DivUe+NF+1-1 + assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe; + assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1]; + + // if the result is normalized + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivUe+1 + // 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after) + // inital Left shift amount = NF + // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit + assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF); + + // if the shift amount is negative then don't shift (keep sticky bit) + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0; + assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; + + // pre-shift the divider result for normalization + assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}}; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtearlyterm.sv b/src/fpu/divremsqrt/divremsqrtearlyterm.sv new file mode 100644 index 000000000..3d9715ed4 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtearlyterm.sv @@ -0,0 +1,27 @@ +module divremsqrtearlyterm import cvw::*; #(parameter cvw_t P) ( + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+3:0] D, // Q4.DIVb + input logic [P.DIVb:0] FirstUM, // U1.DIVb + input logic [P.DIVb+1:0] FirstC, // Q2.DIVb + input logic Firstun, SqrtE, + output logic WZeroE +); + logic weq0E; + aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E); + if (P.RADIX == 2) begin: R2EarlyTerm + logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE; + logic [P.DIVb+2:0] FirstK; + logic wfeq0E; + logic [P.DIVb+3:0] WCF, WSF; + + assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1)); + assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root + assign FZeroDivE = D << 1; // F for divide + mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE); + csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; + aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E); + assign WZeroE = weq0E|wfeq0E; + end else begin + assign WZeroE = weq0E; + end +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv new file mode 100644 index 000000000..e1c152227 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv @@ -0,0 +1,116 @@ +/////////////////////////////////////////// +// fdivsqrtpostproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Divide/Square root postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtfdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( + input logic clk, reset, + input logic StallM, + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+3:0] D, // Q4.DIVb + input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb + input logic [P.DIVb+1:0] FirstC, // Q2.DIVb + input logic SqrtE, + input logic Firstun, SqrtM, SpecialCaseM, + input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0) + input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM, + input logic [P.DIVBLEN-1:0] IntNormShiftM, + input logic [P.XLEN-1:0] PreIntResultM, + output logic [P.DIVb:0] UmM, // U1.DIVb result significand + output logic WZeroE, + output logic DivStickyM, + output logic [P.XLEN-1:0] FIntDivResultM, // U/Q(XLEN.0) + output logic [P.INTDIVb+3:0] PreResultM + +); + + logic [P.DIVb+3:0] Sum; + logic [P.INTDIVb+3:0] W; + logic [P.DIVb:0] PreUmM; + logic NegStickyM; + logic weq0E, WZeroM; + logic [P.XLEN-1:0] IntDivResultM; + logic NegQuotM; // Integer quotient is negative + + ////////////////////////// + // Execute Stage: Detect early termination for an exact result + ////////////////////////// + + // check for early termination on an exact result. + divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS,.Firstun, .WZeroE); + + + ////////////////////////// + // E/M Pipeline register + ////////////////////////// + + flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM); + + ////////////////////////// + // Memory Stage: Postprocessing + ////////////////////////// + + // If the result is not exact, the sticky should be set + assign DivStickyM = ~WZeroM & ~SpecialCaseM; + + // Determine if sticky bit is negative *** Full sum only needed for Integer + assign Sum = WC + WS; + assign NegStickyM = Sum[P.DIVb+3]; + mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit + mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM); + + // Integer quotient or remainder correction, normalization, and special cases + if (P.IDIV_ON_FPU) begin:intpostproc // Int supported + logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; + logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM; + logic [P.INTDIVb+3:0] DTrunc, SumTrunc; + + assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb]; + assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb]; + arithrightshift #(P) rshift(SumTrunc, W); + + assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]}; + + // Integer remainder: sticky and sign correction muxes + assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative + mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM); + + // Select quotient or remainder and do normalization shift + mux2 #(P.INTDIVb+4) presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM); + intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM); + mux2 #(P.INTDIVb+4) preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM); + + divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM); + // sign extend result for W64 + if (P.XLEN==64) begin + mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0], + {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64 + W64M, FIntDivResultM); + end else + assign FIntDivResultM = IntDivResultM[P.XLEN-1:0]; + end +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv new file mode 100644 index 000000000..61f9931ab --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv @@ -0,0 +1,250 @@ +/////////////////////////////////////////// +// fdivsqrtpreproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtfdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic IFDivStartE, + input logic [P.NF:0] Xm, Ym, // Floating-point significands + input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents + input logic [P.FMTBITS-1:0] FmtE, + input logic SqrtE, + input logic XZeroE, + input logic [2:0] Funct3E, + output logic [P.NE+1:0] UeM, // biased exponent of result + output logic [P.DIVb+3:0] X, D, // Q4.DIVb + // Int-specific + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU + input logic IntDivE, W64E, + // Outputs + output logic ISpecialCaseE, + output logic [P.DURLEN:0] CyclesE, + output logic [P.DIVBLEN-1:0] IntNormShiftM, + output logic ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM, + output logic AsM, BsM, BZeroM, + output logic [P.XLEN-1:0] AM +); + + logic [P.DIVb:0] Xnorm, Dnorm; + logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed + logic [P.NE+1:0] UeE; // Result Exponent (FP only) + logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input + logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs + logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result + logic NumerZeroE; // Numerator is zero (X or A) + logic SIGNOVERFLOWE; + logic AZeroE, BZeroE; // A or B is Zero for integer division + logic SignedDivE; // signed division + logic AsE, BsE; // Signs of integer inputs + logic [P.XLEN-1:0] AE; // input A after W64 adjustment + logic ALTBE; + logic EvenExp; + + logic [$clog2(P.RK):0] RightShiftX; + logic [P.DIVBLEN-1:0] ZeroDiff, p; + + + ////////////////////////////////////////////////////// + // Integer Preprocessing + ////////////////////////////////////////////////////// + + if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported + logic [P.XLEN-1:0] BE, PosA, PosB; + + // Extract inputs, signs, zero, depending on W64 mode if applicable + assign SignedDivE = ~Funct3E[0]; + + // Source handling + if (P.XLEN==64) begin // 64-bit, supports W64 + mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); + mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); + end else begin // 32 bits only + assign AE = ForwardedSrcAE; + assign BE = ForwardedSrcBE; + end + assign AZeroE = ~(|AE); + assign BZeroE = ~(|BE); + assign AsE = AE[P.XLEN-1] & SignedDivE; + assign BsE = BE[P.XLEN-1] & SignedDivE; + + // Force integer inputs to be postiive + mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA); + mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB); + + // Select integer or floating point inputs + mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX); + mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD); + mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); + end else begin // Int not supported + assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}}; + assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}}; + assign NumerZeroE = XZeroE; + end + + ////////////////////////////////////////////////////// + // Integer & FP leading zero and normalization shift + ////////////////////////////////////////////////////// + + // count leading zeros for Subnorm FP and to normalize integer inputs + divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell); + divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE); + + // Normalization shift: shift leading one into most significant bit + assign Xnorm = (IFX << ell); + assign Dnorm = (IFD << mE); + + ////////////////////////////////////////////////////// + // Integer Right Shift to digit boundary + // Determine DivXShifted (X shifted to digit boundary) + // and nE (number of fractional digits) + ////////////////////////////////////////////////////// + + assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division + + if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported + + // calculate number of result bits + assign ZeroDiff = mE - ell; // Difference in number of leading zeros + assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros) + assign SIGNOVERFLOWE = 1'b0; + + mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p); + + /* verilator lint_off WIDTH */ + assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + + /* verilator lint_on WIDTH */ + + // Integer special cases (terminate immediately) + assign ISpecialCaseE = BZeroE | ALTBE; + + // calculate right shift amount RightShiftX to complete in discrete number of steps + if (P.RK > 1) begin // more than 1 bit per cycle + + /* verilator lint_offf WIDTH */ + assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps + /* verilator lint_on WIDTH */ + end else begin // radix 2 1 copy doesn't require shifting + assign DivXShifted = DivX; + assign RightShiftX = 0; + end + end else begin + assign ISpecialCaseE = 0; + end + + ////////////////////////////////////////////////////// + // Floating-Point Preprocessing + // Extend to Q4.b format + // shift square root to be in range [1/4, 1) + // Normalized numbers are shifted right by 1 if the exponent is odd + // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. + ////////////////////////////////////////////////////// + + + // Sqrt is initialized on step one as R(X-1), so depends on Radix + // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter + // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2) + // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even + // Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits + // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4) + // This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4 + // Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction. + // This also means only one extra fractional bit is needed becaue we never shift right by more than 1. + // Radix Exponent odd Exponent Even + // 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1) + // 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1) + // Summary: PreSqrtX = r(x/2or4 - 1) + + logic [P.DIVb:0] PreSqrtX; + assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even + mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even + +/* + // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift + // This saves one bit in DIVb because there is no initial right shift. + // However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s. + // That is an optimization for another day. + if (P.RADIX == 2) begin + logic [P.DIVb:0] PreSqrtX; // U1.DIVb + mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even + assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1) + end else begin + logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb + mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even + assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1) + end +*/ + + // Initialize X for division or square root + mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + + ////////////////////////////////////////////////////// + // Selet integer or floating-point operands + ////////////////////////////////////////////////////// + if (P.IDIV_ON_FPU) begin + mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + end else begin + assign X = PreShiftX; + end + + // Divisior register + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D); + + // Floating-point exponent + fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE)); + flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); + + // Number of FSM cycles (to FSM) + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); + + if (P.IDIV_ON_FPU) begin:intpipelineregs + logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; + logic RemOpE; + + /* verilator lint_off WIDTH */ + assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift + /* verilator lint_on WIDTH */ + assign RemOpE = Funct3E[1]; + mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); + + // pipeline registers + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); + flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (P.XLEN==64) + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + end + +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv new file mode 100644 index 000000000..dc480637b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtflags.sv @@ -0,0 +1,183 @@ + +/////////////////////////////////////////// +// flags.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Post-Processing flag calculation +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtflags import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // X sign + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // is a Inf input being used + input logic XInf, YInf, // inputs are infinity + input logic NaNIn, // is a NaN input being used + input logic XSNaN, YSNaN, // inputs are signaling NaNs + input logic XZero, YZero, // inputs are zero + input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + input logic [P.NE+1:0] Me, // exponent of the normalized sum + // rounding + input logic Plus1, // do you add one for rounding + input logic Round, Guard, Sticky, // bits used to determine rounding + input logic UfPlus1, // do you add one for rounding for the unbounded exponent result + // divsqrt + input logic DivOp, // conversion opperation? + input logic Sqrt, // Sqrt? + // flags + output logic DivByZero, // divide by zero flag + output logic Overflow, // overflow flag to select result + output logic Invalid, // invalid flag to select the result + output logic [4:0] PostProcFlg // flags +); + + logic SigNaN; // is an input a signaling NaN + logic Inexact; // final inexact flag + logic FpInexact; // floating point inexact flag + logic DivInvalid; // integer invalid flag + logic Underflow; // Underflow flag + logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent + + /////////////////////////////////////////////////////////////////////////////// + // Overflow + /////////////////////////////////////////////////////////////////////////////// + + // determine if the result exponent is greater than or equal to the maximum exponent or + // the shift amount is greater than the integers size (for cvt to int) + // ShiftGtIntSz calculation: + // a left shift of intlen+1 is still in range but any more than that is an overflow + // inital: | 64 0's | XLEN | + // | 64 0's | XLEN | << 64 + // | XLEN | 00000... | + // 65 = ...0 0 0 0 0 1 0 0 0 0 0 1 + // | or | | or | + // 33 = ...0 0 0 0 0 0 1 0 0 0 0 1 + // | or | | or | + // larger or equal if: + // - any of the bits after the most significan 1 is one + // - the most signifcant in 65 or 33 is still a one in the number and + // one of the later bits is one + if (P.FPSIZES == 1) begin + assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + + end else if (P.FPSIZES == 2) begin + assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]); + default: ResExpGteMax = 1'bx; + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE]; + P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]); + P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]); + P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]); + endcase + end + + + // calulate overflow flag: + // if the result is greater than or equal to the max exponent(not taking into account sign) + // | and the exponent isn't negitive + // | | if the input isnt infinity or NaN + // | | | + assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero); + + /////////////////////////////////////////////////////////////////////////////// + // Underflow + /////////////////////////////////////////////////////////////////////////////// + + // calculate underflow flag: detecting tininess after rounding + // the exponent is negitive + // | the result is subnormal + // | | the result is normal and rounded from a Subnorm + // | | | and if given an unbounded exponent the result does not round + // | | | | and if the result is not exact + // | | | | | and if the input isnt infinity or NaN + // | | | | | | + //assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + + + /////////////////////////////////////////////////////////////////////////////// + // Inexact + /////////////////////////////////////////////////////////////////////////////// + + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed res isn't outputed + //assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid); + assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); + + // if the res is too small to be represented and not 0 + // | and if the res is not invalid (outside the integer bounds) + // | | + + // select the inexact flag to output + assign Inexact = FpInexact; + + /////////////////////////////////////////////////////////////////////////////// + // Invalid + /////////////////////////////////////////////////////////////////////////////// + + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf + + + assign SigNaN = (XSNaN) | (YSNaN) ; + + //invalid flag for division + assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero); + + assign Invalid = SigNaN | (DivInvalid&DivOp); + + /////////////////////////////////////////////////////////////////////////////// + // Divide by Zero + /////////////////////////////////////////////////////////////////////////////// + + // if dividing by zero and not 0/0 + // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) + assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); + + + /////////////////////////////////////////////////////////////////////////////// + // final flags + /////////////////////////////////////////////////////////////////////////////// + + // Combine flags + // - to integer results do not set the underflow or overflow flags + assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact}; + +endmodule + + + + diff --git a/src/fpu/divremsqrt/divremsqrtintspecialcase.sv b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv new file mode 100644 index 000000000..f15f2d075 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtintspecialcase.sv @@ -0,0 +1,15 @@ +module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) ( + input logic BZeroM,RemOpM, ALTBM, + input logic [P.XLEN-1:0] AM, + input signed [P.INTDIVb+3:0] PreIntResultM, + output logic [P.XLEN-1:0] IntDivResultM +); +always_comb + if (BZeroM) begin // Divide by zero + if (RemOpM) IntDivResultM = AM; + else IntDivResultM = {(P.XLEN){1'b1}}; + end else if (ALTBM) begin // Numerator is small + if (RemOpM) IntDivResultM = AM; + else IntDivResultM = 0; + end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/divremsqrtlzc.sv b/src/fpu/divremsqrt/divremsqrtlzc.sv new file mode 100644 index 000000000..1fa14405b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtlzc.sv @@ -0,0 +1,39 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Leading Zero Counter +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtlzc #(parameter WIDTH = 1) ( + input logic [WIDTH-1:0] num, // number to count the leading zeroes of + output logic [$clog2(WIDTH)-1:0] ZeroCnt // the number of leading zeroes +); + + integer i; + + always_comb begin + i = 0; + while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1; // search for leading one + ZeroCnt = i[$clog2(WIDTH)-1:0]; + end +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtnormshift.sv b/src/fpu/divremsqrt/divremsqrtnormshift.sv new file mode 100644 index 000000000..4fc51b4ad --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtnormshift.sv @@ -0,0 +1,81 @@ +/////////////////////////////////////////// +// normshift.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: normalization shifter +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + // convert shift + // fp -> int: | `XLEN zeros | Mantissa | 0's if necessary | << CalcExp + // process: + // - start - CalcExp = 1 + XExp - Largest Bias + // | `XLEN zeros | Mantissa | 0's if necessary | + // + // - shift left 1 (1) + // | `XLEN-1 zeros |bit| frac | 0's if necessary | + // . <- binary point + // + // - shift left till unbiased exponent is 0 (XExp - Largest Bias) + // | 0's | Mantissa | 0's if necessary | + // | keep | + // + // fp -> fp: + // - if result is subnormal or underflowed: + // | `NF-1 zeros | Mantissa | 0's if necessary | << NF+CalcExp-1 + // process: + // - start + // | mantissa | 0's | + // + // - shift right by NF-1 (NF-1) + // | `NF-1 zeros | mantissa | 0's | + // + // - shift left by CalcExp = XExp - Largest bias + new bias + // | 0's | mantissa | 0's | + // | keep | + // + // - if the input is subnormal: + // | lzcIn | 0's if necessary | << ZeroCnt+1 + // - plus 1 to shift out the first 1 + // + // int -> fp: | lzcIn | 0's if necessary | << ZeroCnt+1 + // - plus 1 to shift out the first 1 + + // fma shift + // | 00 | Sm | << LZA output + // . + // - two extra bits so we can correct for an LZA error of 1 or 2 + + // divsqrt shift + // | Nf 0's | Qm | << calculated shift amount + // . + +module divremsqrtnormshift import cvw::*; #(parameter cvw_t P) ( + input logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt, // shift amount + input logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn, // number to be shifted + output logic [P.NORMSHIFTSZDRSU-1:0] Shifted // shifted result +); + + assign Shifted = ShiftIn << ShiftAmt; +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv new file mode 100644 index 000000000..661e48c81 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv @@ -0,0 +1,177 @@ +/////////////////////////////////////////// +// postprocess.sv +// +// Written: kekim@hmc.edu +// Modified: 19 May 2023 +// +// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) ( + // general signals + input logic Xs, Ys, // input signs + input logic [P.NF:0] Xm, Ym, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [3:0] OpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, // inputs are zero + input logic XInf, YInf, // inputs are infinity + input logic XNaN, YNaN, // inputs are NaN + input logic XSNaN, YSNaN, // inputs are signaling NaNs + input logic [1:0] PostProcSel, // select result to be written to fp register + //fma signals + //divide signals + input logic DivSticky, // divider sticky bit + input logic [P.NE+1:0] DivUe, // divsqrt exponent + input logic [P.NF+2:0] DivUm, // divsqrt significand + input logic [P.DIVBLEN-1:0] IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right) + input logic [P.INTDIVb+3:0] PreResultM, // integer result to be shifted + input logic IntDivM, + // final results + output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result + output logic [4:0] PostProcFlg, // postprocesser flags + output logic [P.XLEN-1:0] PreIntResultM // normalized integer result + ); + + + // general signals + logic Rs; // result sign + logic [P.NF-1:0] Rf; // Result fraction + logic [P.NE-1:0] Re; // Result exponent + logic Ms; // norMalized sign + logic [P.NORMSHIFTSZDRSU-1:0] Mf; // norMalized fraction + logic [P.NE+1:0] Me; // normalized exponent + logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic UfPlus1; // do you add one (for determining underflow flag) + logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt; // normalization shift amount + logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn; // input to normalization shift + logic [P.NORMSHIFTSZDRSU-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) + logic Plus1; // add one to the final result? + logic Overflow; // overflow flag used to select results + logic Invalid; // invalid flag used to select results + logic Guard, Round, Sticky; // bits needed to determine rounding + logic [P.FMTBITS-1:0] OutFmt; // output format + // division singals + logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt; // divsqrt shif amount + logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn; // divsqrt shift input + logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift + logic DivByZero; // divide by zero flag + logic DivResSubnorm; // is the divsqrt result subnormal + logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) + // conversion signals + logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter + logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result + logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result + logic CvtResUf; // did the convert result underflow + logic IntInvalid; // invalid integer flag + // readability signals + logic Mult; // multiply opperation + logic Sqrt; // is the divsqrt opperation sqrt + logic Int64; // is the integer 64 bits? + logic Signed; // is the opperation with a signed integer? + logic IntToFp; // is the opperation an int->fp conversion? + logic CvtOp; // convertion opperation + logic DivOp; // divider opperation + logic InfIn; // are any of the inputs infinity + logic NaNIn; // are any of the inputs NaN + + // signals to help readability + + assign DivOp = (PostProcSel == 2'b01); + assign Sqrt = OpCtrl[0]; + + // is there an input of infinity or NaN being used + assign InfIn = XInf|YInf; + assign NaNIn = XNaN|YNaN; + + // choose the ouptut format depending on the opperation + // - fp -> fp: OpCtrl contains the percision of the output + // - otherwise: Fmt contains the percision of the output + if (P.FPSIZES == 2) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); + assign OutFmt = Fmt; + else if (P.FPSIZES == 3 | P.FPSIZES == 4) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; + assign OutFmt = Fmt; + + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// + + // final claulations before shifting + + divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + + assign ShiftAmt = DivShiftAmt; + assign ShiftIn = DivShiftIn; + + // main normalization shift + divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted); + + // correct for LZA/divsqrt error + divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf); + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // round to zero + // round to -infinity + // round to infinity + // round to nearest max magnitude + + // calulate result sign used in rounding unit + divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms); + + divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue, + .Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); + + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// + + assign Rs = Ms; + + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// + + divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, + .Xs, .OutFmt, .Sqrt, + .NaNIn, .Round, .DivByZero, + .Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1, + .Me, .Invalid, .Overflow, .PostProcFlg); + + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + + //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + + divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero, + .Frm, .OutFmt, .XNaN, .YNaN, + .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn, + .XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes ); + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv new file mode 100644 index 000000000..7a7e46964 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtround.sv @@ -0,0 +1,267 @@ +/////////////////////////////////////////// +// divremsqrtround.sv +// +// Written: kekim@hmc.edu, me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Rounder +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + + +module divremsqrtround import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic [2:0] Frm, // rounding mode + input logic Ms, // normalized sign + input logic [P.NORMSHIFTSZDRSU-1:0] Mf, // normalized fraction + // divsqrt + input logic DivOp, // is a division opperation being done + input logic DivSticky, // divsqrt sticky bit + input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent + // outputs + output logic [P.NE+1:0] Me, // normalied fraction + output logic UfPlus1, // do you add one to the result if given an unbounded exponent + output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [P.NE-1:0] Re, // Result exponent + output logic [P.NF-1:0] Rf, // Result fractionNormS + output logic Sticky, // sticky bit + output logic Plus1, // do you add one to the final result + output logic Round, Guard // bits needed to calculate rounding +); + + logic UfCalcPlus1; // calculated plus one for unbounded exponent + logic NormSticky; // normalized sum's sticky bit + logic [P.NF-1:0] RoundFrac; // rounded fraction + logic FpGuard, FpRound; // floating point round/guard bits + logic FpLsbRes; // least significant bit of floating point result + logic LsbRes; // lsb of result + logic CalcPlus1; // calculated plus1 + logic FpPlus1; // do you add one to the fp result + logic [P.FLEN:0] RoundAdd; // how much to add to the result + +// what position is XLEN in? +// options: +// 1: XLEN > NF > NF1 +// 2: NF > XLEN > NF1 +// 3: NF > NF1 > XLEN +// single and double will always be smaller than XLEN + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // {Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - plus 1 otherwise + + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - Plus 1 otherwise + + + // determine what format the final result is in: int or fp + + // sticky bit calculation + if (P.FPSIZES == 1) begin + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]); + + end else if (P.FPSIZES == 2) begin + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) | + (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]); + + + end else if (P.FPSIZES == 3) begin + + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) | + (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) | + (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]); + + end else if (P.FPSIZES == 4) begin + assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) | + (|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) | + (|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) | + (|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT)); + end + + + + // only add the Addend sticky if doing an FMA opperation + // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) + //assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm; + assign Sticky = DivSticky&DivOp | NormSticky; + //assign Sticky = DivSticky&DivOp; + + + + + // determine round and LSB of the rounded value + // - underflow round bit is used to determint the underflow flag + if (P.FPSIZES == 1) begin + assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1]; + assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF]; + assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2]; + + end else if (P.FPSIZES == 2) begin + assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1]; + assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1]; + assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2]; + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2]; + end + P.FMT1: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2]; + end + P.FMT2: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2]; + end + default: begin + FpGuard = 1'bx; + FpLsbRes = 1'bx; + FpRound = 1'bx; + end + endcase + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2]; + end + 2'h1: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2]; + end + 2'h0: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2]; + end + 2'h2: begin + FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1]; + FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF]; + FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2]; + end + endcase + end + + + assign Guard = FpGuard; + assign LsbRes = FpLsbRes; + assign Round = FpRound; + + + always_comb begin + // Determine if you add 1 + case (Frm) + 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up + 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (Frm) + 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + + end + + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); + assign FpPlus1 = Plus1; + assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); + + + + + // place Plus1 into the proper position for the format + if (P.FPSIZES == 1) begin + assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1}; + + end else if (P.FPSIZES == 2) begin + // \/FLEN+1 + // | NE+2 | NF | + // '-NE+2-^----NF1----^ + // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1 + assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt}; + + end else if (P.FPSIZES == 3) begin + assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)}; + + end else if (P.FPSIZES == 4) + assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; + + + + // trim unneeded bits from fraction + assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF]; + + + + // select the exponent + assign Me = Ue; + + + + // round the result + // - if the fraction overflows one should be added to the exponent + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; + assign Re = FullRe[P.NE-1:0]; + + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv new file mode 100644 index 000000000..b0dd4270b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv @@ -0,0 +1,45 @@ +/////////////////////////////////////////// +// divremsqrtroundsign.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Sign calculation for rounding +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // x sign + input logic Ys, // y sign + input logic Sqrt, // sqrt oppertion? (when using divsqrt unit) + input logic DivOp, // is divsqrt opperation + output logic Ms // normalized result sign +); + + logic Qs; // divsqrt result sign + + // calculate divsqrt sign + assign Qs = Xs^(Ys&~Sqrt); + + // Select sign for rounding calulation + assign Ms = (Qs&DivOp); + +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv new file mode 100644 index 000000000..6b119ed6c --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv @@ -0,0 +1,94 @@ +/////////////////////////////////////////// +// divremsqrtshiftcorrection.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: shift correction +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) ( + input logic [P.NORMSHIFTSZDRSU-1:0] Shifted, // the shifted sum before LZA correction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivResSubnorm, // is the divsqrt result subnormal + input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent + input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) + //fma + //input logic FmaOp, // is it an fma opperation + //input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + //input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + //input logic FmaSZero, + // output + //output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum + output logic [P.NORMSHIFTSZDRSU-1:0] Mf, // the shifted sum before LZA correction + output logic [P.NE+1:0] Ue // corrected exponent for divider +); + + logic [P.NORMSHIFTSZDRSU-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted + logic [P.NORMSHIFTSZDRSU-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift + logic ResSubnorm; // is the result Subnormal + logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction + logic LeftShiftQm; // should the divsqrt result be shifted one to the left + + // LZA correction + assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1]; + + // correct the shifting error caused by the LZA + // - the only possible mantissa for a plus two is all zeroes + // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone + //mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted); + + // correct the shifting of the divsqrt caused by producing a result in (2, .5] range + // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) + assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1)); + //assign LeftShiftQm = ((DivUe==1)); + assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}}; + assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}}; + mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + + // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits + always_comb + //if(FmaOp) Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}}; + //if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; + if (~DivResSubnorm) Mf = CorrQmShifted; + else Mf = Shifted[P.NORMSHIFTSZDRSU-1:0]; + + // Determine sum's exponent + // main exponent issues: + // - LZA was one too large + // - LZA was two too large + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 1 + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 + // if plus1 If plus2 kill if the result Zero or actually subnormal + // | | | + //assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}}; + + // recalculate if the result is subnormal after LZA correction + //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1]; + + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift + assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; + //assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv new file mode 100644 index 000000000..d7f569add --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv @@ -0,0 +1,240 @@ +/////////////////////////////////////////// +// divremsqrtspecialcase.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // X sign + input logic [P.NF:0] Xm, Ym, // input significand's + input logic XNaN, YNaN, // are the inputs NaN + input logic [2:0] Frm, // rounding mode + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // are any inputs infinity + input logic NaNIn, // are any input NaNs + input logic XInf, YInf, // are X or Y inifnity + input logic XZero, // is X zero + input logic Plus1, // do you add one for rounding + input logic Rs, // the result's sign + input logic Invalid, Overflow, // flags to choose the result + input logic [P.NE-1:0] Re, // Result exponent + input logic [P.NE+1:0] FullRe, // Result full exponent + input logic [P.NF-1:0] Rf, // Result fraction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivByZero, // divide by zero flag + // outputs + output logic [P.FLEN-1:0] PostProcRes // final result +); + + logic [P.FLEN-1:0] XNaNRes; // X is NaN result + logic [P.FLEN-1:0] YNaNRes; // Y is NaN result + logic [P.FLEN-1:0] InvalidRes; // Invalid result result + logic [P.FLEN-1:0] UfRes; // underflowed result result + logic [P.FLEN-1:0] OfRes; // overflowed result result + logic [P.FLEN-1:0] NormRes; // normal result + logic OfResMax; // does the of result output maximum norm fp number + logic KillRes; // kill the result for underflow + logic SelOfRes; // should the overflow result be selected + + + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); + + // select correct outputs for special cases + if (P.FPSIZES == 1) begin + //NaN res selection depending on standard + if(P.IEEE754) begin + assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Rs, Re, Rf}; + + end else if (P.FPSIZES == 2) begin + if(P.IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end + + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}}; + else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + else + if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}}; + else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + P.FMT1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end + OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; + end + P.FMT2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]}; + YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]}; + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)}; + UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]}; + end + default: begin + if(P.IEEE754) begin + XNaNRes = (P.FLEN)'(0); + YNaNRes = (P.FLEN)'(0); + InvalidRes = (P.FLEN)'(0); + end else begin + InvalidRes = (P.FLEN)'(0); + end + OfRes = (P.FLEN)'(0); + UfRes = (P.FLEN)'(0); + NormRes = (P.FLEN)'(0); + end + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + 2'h1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]}; + YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]}; + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)}; + UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]}; + end + 2'h0: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]}; + YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]}; + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)}; + UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]}; + end + 2'h2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]}; + YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]}; + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)}; + // zero is exact if dividing by infinity so don't add 1 + UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]}; + end + endcase + end + + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); + + // calculate if the overflow result should be selected + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + + // output infinity with result sign if divide by zero + if(P.IEEE754) + always_comb + if(XNaN) PostProcRes = XNaNRes; + else if(YNaN) PostProcRes = YNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv new file mode 100644 index 000000000..2385cac20 --- /dev/null +++ b/src/fpu/divremsqrt/drsu.sv @@ -0,0 +1,102 @@ +/////////////////////////////////////////// +// drsu.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module drsu import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic [P.FMTBITS-1:0] FmtE, + input logic XsE, YsE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic XSNaNE, YSNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + input logic [2:0] Frm, + input logic [3:0] OpCtrl, + input logic [1:0] PostProcSel, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [P.FLEN-1:0] FResM, + output logic [P.XLEN-1:0] FIntDivResultM, + output logic [4:0] FlgM +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic [P.DURLEN-1:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic IntDivM; // Integer operation + logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + logic [P.DIVb:0] UmM; + logic [P.NF+2:0] UmMexact; //U1.NF+2 + logic [P.NE+1:0] UeM; + logic DivStickyM; + logic [P.INTDIVb+3:0] PreResultM; + logic [P.XLEN-1:0] PreIntResultM; + logic [P.DIVBLEN-1:0] IntNormShiftM; + + divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, + .XeE, .YeE, .SqrtE, .SqrtM, + .XInfE, .YInfE, .XZeroE, .YZeroE, + .XNaNE, .YNaNE, + .FDivStartE, .IDivStartE, .W64E, + .StallM, .DivStickyM, .FDivBusyE, .UeM, + .UmM, + .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M, + .Funct3E, .IntDivE, .FIntDivResultM, .IntDivM, + .FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM); + assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs + divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM, + .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), + .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM), + .PreIntResultM, .PreResultM, .IntNormShiftM); +endmodule + diff --git a/src/fpu/divremsqrt/intrightshift.sv b/src/fpu/divremsqrt/intrightshift.sv new file mode 100644 index 000000000..dd4f47aeb --- /dev/null +++ b/src/fpu/divremsqrt/intrightshift.sv @@ -0,0 +1,37 @@ +/////////////////////////////////////////// +// fdivsqrtpostproc.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Divide/Square root postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module intrightshift import cvw::*; #(parameter cvw_t P) ( + input logic signed [P.INTDIVb+3:0] shiftin, + input logic [P.DIVBLEN-1:0] shiftamt, + output logic signed [P.INTDIVb+3:0] shifted +); + assign shifted = shiftin >> shiftamt; + +endmodule diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv new file mode 100644 index 000000000..9ca2e5b61 --- /dev/null +++ b/testbench/testbench-fp.sv @@ -0,0 +1,1682 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com, james.stine@okstate.edu +// +// Purpose: Testbench for UCB Testfloat on Wally +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "config.vh" +`include "tests-fp.vh" + +import cvw::*; + +module testbenchfp; + // Two parameters TEST, TEST_SIZE used with testfloat.do in sim dir + // to run specific precisions (e.g., quad or all) + parameter TEST="none"; + parameter TEST_SIZE="none"; + + `include "parameter-defs.vh" + + //parameter MAXVECTORS = 8388610; + parameter MAXVECTORS = 100000; + + // FIXME: needs cleaning of unused variables (jes) + string Tests[]; // list of tests to be run + logic [3:0] OpCtrl[]; // list of op controls + logic [2:0] Unit[]; // list of units being tested + logic WriteInt[]; // Is being written to integer resgiter + logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 + //logic [2:0] Frm[4:0] = {3'b011, 3'b011, 3'b011, 3'b011, 3'b011}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100 *** MODIFIED ROUNDING MODES + logic [1:0] Fmt[]; // list of formats for the other units + + logic clk=0; + logic [31:0] TestNum=0; // index for the test + logic [31:0] OpCtrlNum=0; // index for OpCtrl + logic [31:0] errors=0; // how many errors + logic [31:0] VectorNum=0; // index for test vector + logic [31:0] FrmNum=0; // index for rounding mode + logic [P.Q_LEN*4+7:0] TestVectors[MAXVECTORS:0]; // list of test vectors + + logic [1:0] FmtVal; // value of the current Fmt + logic [2:0] UnitVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal + logic [3:0] OpCtrlVal; + logic WriteIntVal; // value of the current WriteInt + logic [P.FLEN-1:0] X, Y, Z; // inputs read from TestFloat + logic [P.FLEN-1:0] XPostBox; // inputs read from TestFloat + logic [P.XLEN-1:0] SrcA, SrcB; // integer input + logic W64; // is W64 instruction + logic [P.FLEN-1:0] Ans; // correct answer from TestFloat + logic [P.FLEN-1:0] Res; // result from other units + logic [4:0] AnsFlg; // correct flags read from testfloat + logic [4:0] ResFlg, Flg; // Result flags + logic [P.FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad + logic [P.FLEN-1:0] FpRes, FpCmpRes; // Results from each unit + logic [P.XLEN-1:0] IntRes, CmpRes; // Results from each unit + logic [4:0] FmaFlg, CvtFlg, DivFlg; // Outputed flags + logic [4:0] CmpFlg; // Outputed flags + logic AnsNaN, ResNaN, NaNGood; + logic Xs, Ys, Zs; // sign of the inputs + logic [P.NE-1:0] Xe, Ye, Ze; // exponent of the inputs + logic [P.NF:0] Xm, Ym, Zm; // mantissas of the inputs + logic XNaN, YNaN, ZNaN; // is the input NaN + logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN + logic XSubnorm, ZSubnorm; // is the input denormalized + logic XInf, YInf, ZInf; // is the input infinity + logic XZero, YZero, ZZero; // is the input zero + logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones + logic [P.CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) + logic IntZero; + logic CvtResSgnE; + logic [P.NE:0] CvtCalcExpE; // the calculated exponent + logic [P.LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [P.DIVb:0] Quot; + logic CvtResSubnormUfE; + logic DivStart=0; + logic FDivBusyE; + logic OldFDivBusyE; + logic reset = 1'b0; + logic [$clog2(P.NF+2)-1:0] XZeroCnt, YZeroCnt; + + // in-between FMA signals + logic Mult; + logic Ss; + logic [P.NE+1:0] Pe; + logic [P.NE+1:0] Se; + logic ASticky; + logic KillProd; + logic [$clog2(3*P.NF+5)-1:0] SCnt; + logic [3*P.NF+3:0] Sm; + logic InvA; + logic NegSum; + logic As; + logic Ps; + logic DivSticky; + logic DivDone; + logic DivNegSticky; + logic [P.NE+1:0] DivCalcExp; + logic divsqrtop; + + // Missing logic vectors fdivsqrt + logic [2:0] Funct3E; + logic [2:0] Funct3M; + logic FlushE; + logic IFDivStartE; + logic IDivStart; + logic FDivDoneE; + logic [P.NE+1:0] UeM; + logic [P.DIVb:0] UmM; + logic [P.XLEN-1:0] FIntDivResultM; + logic ResMatch; // Check if result match + logic FlagMatch; // Check if IEEE flags match + logic CheckNow; // Final check + logic FMAop; // Is this a FMA operation? + logic IntDivE; // Is Integer operation on FPU? + + // FSM for testing each item per clock + typedef enum logic [2:0] {S0, Start, S2, Done} statetype; + statetype state, nextstate; + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||||| |||||||| ||||||| ||||||||| ||||||| |||||||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||| ||||||| ||| ||||||| |||||||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||| ||||||| ||| ||||||| |||||||| ||||||||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // select tests relevent to the specified configuration + // cvtint - test integer conversion unit (fcvtint) + // cvtfp - test floating-point conversion unit (fcvtfp) + // cmp - test comparison unit's LT, LE, EQ opperations (fcmp) + // add - test addition + // sub - test subtraction + // div - test division + // sqrt - test square root + // all - test all of the above + flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M)); + + initial begin + // Information displayed for user on what is simulating + // $display("\nThe start of simulation..."); + $display("\nThe start of simulation... INTDIVb: %d, DIVB: %d, DIVBLEN: %d , RK: %d",INTDIVb, DIVb, DIVBLEN, RK); + // $display("This simulation for TEST is %s", TEST); + if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported + if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion + // add the 128-bit cvtint tests to the to-be-tested list + Tests = {Tests, f128rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; + end + if (P.XLEN == 64) begin // if 64-bit integers are supported add their conversions + Tests = {Tests, f128rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + // if the floating-point conversions are being tested + if (TEST === "cvtfp" | TEST === "all") begin + if (P.D_SUPPORTED) begin // if double precision is supported + // add the 128 <-> 64 bit conversions to the to-be-tested list + Tests = {Tests, f128f64cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b01, 3'b11}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (P.F_SUPPORTED) begin // if single precision is supported + // add the 128 <-> 32 bit conversions to the to-be-tested list + Tests = {Tests, f128f32cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b00, 3'b11}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (P.ZFH_SUPPORTED) begin // if half precision is supported + // add the 128 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f128f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b11}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested + // add the compare tests/op-ctrls/unit/fmt + Tests = {Tests, f128cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the addition tests/op-ctrls/unit/fmt + Tests = {Tests, f128add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + // add the subtraction tests/op-ctrls/unit/fmt + Tests = {Tests, f128sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the multiply tests/op-ctrls/unit/fmt + Tests = {Tests, f128mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested + Tests = {Tests, f128fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported + if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested + Tests = {Tests, f64rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + if (P.XLEN == 64) begin // if 64-bit integers are being supported + Tests = {Tests, f64rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested + if (P.F_SUPPORTED) begin // if single precision is supported + // add the 64 <-> 32 bit conversions to the to-be-tested list + Tests = {Tests, f64f32cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b00, 3'b01}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (P.ZFH_SUPPORTED) begin // if half precision is supported + // add the 64 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f64f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b01}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested + Tests = {Tests, f64fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported + if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested + Tests = {Tests, f32rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + if (P.XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f32rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested + if (P.ZFH_SUPPORTED) begin + // add the 32 <-> 16 bit conversions to the to-be-tested list + Tests = {Tests, f32f16cvt}; + // add the op-ctrls (i.e. the format of the result) + OpCtrl = {OpCtrl, 3'b10, 3'b00}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + // add the unit being tested and fmt (input format) + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; + end + for(int i = 0; i<5; i++) begin + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiply is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested + Tests = {Tests, f32fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + end + if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported + if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested + Tests = {Tests, f16rv32cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (P.XLEN == 64) begin // if 64-bit integers are supported + Tests = {Tests, f16rv64cvtint}; + // add the op-codes for these tests to the op-code list + OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; + // add what unit is used and the fmt to their lists (one for each test) + for(int i = 0; i<20; i++) begin + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16cmp}; + OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0, 1'b0}; + for(int i = 0; i<15; i++) begin + Unit = {Unit, `CMPUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16add}; + OpCtrl = {OpCtrl, `ADD_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sub}; + OpCtrl = {OpCtrl, `SUB_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16mul}; + OpCtrl = {OpCtrl, `MUL_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested + Tests = {Tests, f16fma}; + OpCtrl = {OpCtrl, `FMA_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `FMAUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (P.IDIV_ON_FPU |1'b1) begin + if (P.Q_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f128sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end + end + if (P.D_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f64sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end + end + if (P.S_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f32sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end + + end + if (P.ZFH_SUPPORTED) begin + if (TEST === "fdivremsqrt" | TEST === "div_drsu") begin // if division on drsu is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + if (TEST === "fdivremsqrt" | TEST === "sqrt_drsu") begin // if square-root on drsu is being tested + // add the square-root tests/op-ctrls/unit/fmt + Tests = {Tests, f16sqrt}; + OpCtrl = {OpCtrl, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + if (P.XLEN == 64 & P.IDIV_ON_FPU) begin + if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested + Tests = {Tests, int64rem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested + Tests = {Tests, int64div}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested + Tests = {Tests, int64remu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested + Tests = {Tests, int64divu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer remainder is being tested + Tests = {Tests, int64remw}; + OpCtrl = {OpCtrl, `INTREMW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer remainder is being tested + Tests = {Tests, int64remuw}; + OpCtrl = {OpCtrl, `INTREMUW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if w-type integer division is being tested + Tests = {Tests, int64divw}; + OpCtrl = {OpCtrl, `INTDIVW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivuw"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned w-type integer divison is being tested + Tests = {Tests, int64divuw}; + OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + // RV32 + else if (P.IDIV_ON_FPU) begin + if (TEST === "intrem" | TEST === "intdivrem" | TEST === "fdivremsqrt") begin // if integer remainder is being tested + Tests = {Tests, int32rem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if integer division is being tested + Tests = {Tests, int32div}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer remainder is being tested + Tests = {Tests, int32remu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem" | TEST === "fdivremsqrt") begin // if unsigned integer division is being tested + Tests = {Tests, int32divu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b1}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end + end + // check if nothing is being tested + + $display("This simulation for TEST contains %d vectors", Tests.size); + if (Tests.size() == 0) begin + $display("TEST %s not supported in this configuration", TEST); + $stop; + end + end + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||||| |||||||| ||||||||| ||||||| ||||||||| |||||||| ||||||| ||||||||| + // ||| ||| ||| ||| ||| || || ||| ||| ||| ||| + // |||||||| |||||||| ||||||||| || || ||| |||||||| ||||||| ||| + // ||| || ||| ||| ||| || || ||| ||| ||| ||| + // ||| ||| |||||||| ||| ||| ||||||| ||| |||||||| ||||||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // Read the first test + initial begin + //string testname = {`PATH, Tests[TestNum]}; + static string pp = `PATH; + string testname; + string tt0; + tt0 = $psprintf("%s", Tests[TestNum]); + testname = {pp, tt0}; + //$display("Here you are %s", testname); + $display("\n\nRunning %s vectors ", Tests[TestNum]); + $readmemh(testname, TestVectors); + + // set the test index to 0 + TestNum = 0; + end + + // set a the signals for all tests + always_comb UnitVal = Unit[TestNum]; + always_comb FmtVal = Fmt[TestNum]; + always_comb OpCtrlVal = OpCtrl[OpCtrlNum]; + always_comb WriteIntVal = WriteInt[OpCtrlNum]; + always_comb FrmVal = Frm[FrmNum]; + + // modify the format signal if only 2 percisions supported + // - 1 for the larger precision + // - 0 for the smaller precision + always_comb begin + if (P.FMTBITS == 1) ModFmt = FmtVal == P.FMT; + else ModFmt = FmtVal; + end + + // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector + readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), + .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB, + .Xs, .Ys, .Zs, .Unit(UnitVal), + .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), + .Xm, .Ym, .Zm, + .XNaN, .YNaN, .ZNaN, + .XSNaN, .YSNaN, .ZSNaN, + .XSubnorm, .ZSubnorm, + .XZero, .YZero, .ZZero, + .XInf, .YInf, .ZInf, .XExpMax, .Funct3E, .W64, + .X, .Y, .Z, .XPostBox); + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||| ||| ||| ||||||||| + // ||| ||| ||| ||| ||| + // ||| ||| ||| ||| ||| + // ||| ||| ||| ||| ||| + // ||||||| ||||||||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // instantiate devices under test + if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma + fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), + .Xe(Xe), .Ye(Ye), .Ze(Ze), + .Xm(Xm), .Ym(Ym), .Zm(Zm), + .XZero, .YZero, .ZZero, .Ss, .Se, + .OpCtrl(OpCtrlVal[2:0]), .Sm, .InvA, .SCnt, .As, .Ps, + .ASticky); + end + + if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt + fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), + .XZero(XZero), .OpCtrl(OpCtrlVal[2:0]), .IntZero, + .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), + .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); + end + + if (TEST === "cmp" | TEST === "all") begin: fcmp + fcmp #(P) fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal[2:0]), .Xs, .Ys, .Xe, .Ye, + .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes), + .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes)); + end + + if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt + fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), + .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), + .XNaNE(XNaN), .YNaNE(YNaN), + .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0), + .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp), + .UmM(Quot), + .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M), + .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM), + .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE)); + end + if (TEST === "fdivremsqrt" | TEST === "div_drsu" | TEST === "sqrt_drsu" | TEST === "intdivrem" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" ) begin: divremsqrt + drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), + .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]), + .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), + .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64), + .StallM(1'b0), .FDivBusyE, + .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M), + .Funct3E(Funct3E), .IntDivE(IntDivE), + .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg)); + end + else begin: postprocess + postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), + .OpCtrl(OpCtrlVal[2:0]), .DivUm(Quot), .DivUe(DivCalcExp), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss), + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE), + .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE), + .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), + .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, + .FmaASticky(ASticky), .FmaSe(Se), + .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); + end + + assign CmpFlg[3:0] = 0; + + // produce clock + always begin + clk = 1; #5; clk = 0; #5; + end + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||| ||| |||||||||| ||||| ||| + // ||||||| ||| ||| ||| ||||||| ||| + // |||| ||| ||| |||||||||| |||| ||| ||| + // |||| ||| ||| ||| ||| |||| ||| ||| + // |||| ||| ||| ||| ||| |||| ||| ||| + // |||| |||||| ||| ||| |||| |||||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // Check if the correct answer and result is a NaN + always_comb begin + if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin + // an integer output can't be a NaN + AnsNaN = 1'b0; + ResNaN = 1'b0; + end + else if (UnitVal === `CVTFPUNIT) begin + case (OpCtrlVal[1:0]) + 2'b11: begin // quad + AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]); + ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]); + end + 2'b01: begin // double + AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]); + ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]); + end + 2'b00: begin // single + AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]); + ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]); + end + 2'b10: begin // half + AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]); + ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]); + end + endcase + end + else begin + case (FmtVal) + 2'b11: begin // quad + AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]); + ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]); + end + 2'b01: begin // double + AnsNaN = &Ans[P.D_LEN-2:P.D_NF]&(|Ans[P.D_NF-1:0]); + ResNaN = &Res[P.D_LEN-2:P.D_NF]&(|Res[P.D_NF-1:0]); + end + 2'b00: begin // single + AnsNaN = &Ans[P.S_LEN-2:P.S_NF]&(|Ans[P.S_NF-1:0]); + ResNaN = &Res[P.S_LEN-2:P.S_NF]&(|Res[P.S_NF-1:0]); + end + 2'b10: begin // half + AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]); + ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]); + end + endcase + end + end + + always_comb begin + // select the result to check + case (UnitVal) + `FMAUNIT: Res = FpRes; + `DIVUNIT: Res = FpRes; + `CMPUNIT: Res = CmpRes; + `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes; + `CVTFPUNIT: Res = FpRes; + `INTDIVUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes; + endcase + + // select the flag to check + case (UnitVal) + `FMAUNIT: ResFlg = Flg; + `DIVUNIT: ResFlg = Flg; + `CMPUNIT: ResFlg = CmpFlg; + `CVTINTUNIT: ResFlg = Flg; + `CVTFPUNIT: ResFlg = Flg; + `INTDIVUNIT: ResFlg = Flg; + endcase + + // Use four state test sequence to handle div properly. + // Four states should allow other operations to finish + // properly and within time. + case (state) + S0: begin + DivStart = 1'b0; + nextstate = Start; + end + Start: begin + if (UnitVal == `DIVUNIT | (UnitVal == `INTDIVUNIT & (OpCtrlVal == `SQRT_OPCTRL | OpCtrlVal == `DIV_OPCTRL))) begin + DivStart = 1'b1; + IntDivE = 1'b0; + end + else if (UnitVal == `INTDIVUNIT) begin + IDivStart = 1'b1; + IntDivE = 1'b1; + end + else + DivStart = 1'b0; + nextstate = S2; + end + S2: begin + DivStart = 1'b0; + IDivStart = 1'b0; + if ((FDivBusyE|~DivDone)&(UnitVal == `DIVUNIT | UnitVal == `INTDIVUNIT)) + nextstate = S2; + else + nextstate = Done; + end + Done: begin + DivStart = 1'b0; + IDivStart = 1'b0; + IntDivE = 1'b0; + nextstate = S0; + end + endcase // case (state) + + end + + // Provide reset for divsqrt to reset state + initial + begin + #0 reset = 1'b1; + #25 reset = 1'b0; + end + + // Left-over from before - will remove soon + always @(posedge clk) + OldFDivBusyE = FDivDoneE; + + // state machine to handle timing for testing due + // various cycle counts for different fp/int operations + // Adds vector at start of clock + always @(posedge clk) begin + + // state machine element for testing + if (reset) + state <= S0; + else + state <= nextstate; + + // Increment the vector when Done with each test + if (state == Done) + VectorNum += 1; // increment the vector + + end + + // check results on falling edge of clk + always @(negedge clk) begin + // check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify: + // - the sign of the NaN does not matter for the opperations being tested + // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter + if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) + case (FmtVal) + 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | + (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]})) | + (ZNaN&(Res[P.Q_LEN-2:0] === {Z[P.Q_LEN-2:P.Q_NF],1'b1,Z[P.Q_NF-2:0]}))); + 2'b01: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | + (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]})) | + (ZNaN&(Res[P.D_LEN-2:0] === {Z[P.D_LEN-2:P.D_NF],1'b1,Z[P.D_NF-2:0]}))); + 2'b00: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | + (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]})) | + (ZNaN&(Res[P.S_LEN-2:0] === {Z[P.S_LEN-2:P.S_NF],1'b1,Z[P.S_NF-2:0]}))); + 2'b10: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | + (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) | + (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]}))); + endcase + else if (UnitVal === `CVTFPUNIT) // if converting from FP to FP OpCtrl contains the final FP format + case (OpCtrlVal[1:0]) + 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | + (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | + (YNaN&(Res[P.Q_LEN-2:0] === {Y[P.Q_LEN-2:P.Q_NF],1'b1,Y[P.Q_NF-2:0]}))); + 2'b01: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.D_LEN-1:0] === {1'b0, {P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.D_LEN-2:0] === {{P.D_NE+1{1'b1}}, {P.D_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.D_LEN-2:0] === Ans[P.D_LEN-2:0])) | + (XNaN&(Res[P.D_LEN-2:0] === {X[P.D_LEN-2:P.D_NF],1'b1,X[P.D_NF-2:0]})) | + (YNaN&(Res[P.D_LEN-2:0] === {Y[P.D_LEN-2:P.D_NF],1'b1,Y[P.D_NF-2:0]}))); + 2'b00: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.S_LEN-1:0] === {1'b0, {P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.S_LEN-2:0] === {{P.S_NE+1{1'b1}}, {P.S_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.S_LEN-2:0] === Ans[P.S_LEN-2:0])) | + (XNaN&(Res[P.S_LEN-2:0] === {X[P.S_LEN-2:P.S_NF],1'b1,X[P.S_NF-2:0]})) | + (YNaN&(Res[P.S_LEN-2:0] === {Y[P.S_LEN-2:P.S_NF],1'b1,Y[P.S_NF-2:0]}))); + 2'b10: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res[P.H_LEN-1:0] === {1'b0, {P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (AnsFlg[4]&(Res[P.H_LEN-2:0] === {{P.H_NE+1{1'b1}}, {P.H_NF-1{1'b0}}})) | + (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | + (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | + (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]}))); + endcase + else NaNGood = 1'b0; // integers can't be NaNs + + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + // ||| ||| ||| ||| ||| ||| ||| + // ||| |||||||||| ||||||| ||| |||||| + // ||| ||| ||| ||| ||| ||| ||| + // ||||||| ||| ||| ||||||| ||||||| ||| ||| + + /////////////////////////////////////////////////////////////////////////////////////////////// + + // check if result is correct + assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); + assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); + assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal ==`INTDIVU_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTREMW_OPCTRL) | (OpCtrlVal == `INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) ; + assign FMAop = (OpCtrlVal == `FMAUNIT); + assign DivDone = OldFDivBusyE & ~FDivBusyE; + //assign DivDone = ~FDivBusyE; + //assign DivDone = FDivDoneE; + assign CheckNow = ((DivDone | ~divsqrtop) | + (TEST == "add" | TEST == "fma" | TEST == "sub") | + ((TEST == "all") & (DivDone | ~divsqrtop))); + + if (~(ResMatch & FlagMatch) & CheckNow & (Ans[0] !== 1'bx)) begin + errors += 1; + $display("\nError in %s", Tests[TestNum]); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $stop; + end + + if (TestVectors[VectorNum][100:0] === 101'bx & Tests[TestNum] !== "" ) begin // if reached the eof + // increment the test + TestNum += 1; + // clear the vectors + for(int i=0; i quad + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.Q_LEN+P.XLEN-1:8+(P.Q_LEN)]; + Ans = TestVector[8+(P.Q_LEN-1):8]; + end + 2'b10: begin // int -> quad + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.Q_LEN+32-1]}}, TestVector[8+P.Q_LEN+32-1:8+(P.Q_LEN)]}; + Ans = TestVector[8+(P.Q_LEN-1):8]; + end + 2'b01: begin // quad -> long + X = {TestVector[8+P.XLEN+P.Q_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // quad -> int + X = {TestVector[8+32+P.Q_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end + endcase + end + 2'b01: if (P.D_SUPPORTED) begin // double + // {Int->Fp?, is the integer a long} + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> double + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.D_LEN+P.XLEN-1:8+(P.D_LEN)]; + Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; + end + 2'b10: begin // int -> double + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.D_LEN+32-1]}}, TestVector[8+P.D_LEN+32-1:8+(P.D_LEN)]}; + Ans = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+(P.D_LEN-1):8]}; + end + 2'b01: begin // double -> long + X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+P.XLEN+P.D_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // double -> int + X = {{P.FLEN-P.D_LEN{1'b1}}, TestVector[8+32+P.D_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end + endcase + end + 2'b00: if (P.S_SUPPORTED) begin // single + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> single + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.S_LEN+P.XLEN-1:8+(P.S_LEN)]; + Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; + end + 2'b10: begin // int -> single + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.S_LEN+32-1]}}, TestVector[8+P.S_LEN+32-1:8+(P.S_LEN)]}; + Ans = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+(P.S_LEN-1):8]}; + end + 2'b01: begin // single -> long + X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+P.XLEN+P.S_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // single -> int + X = {{P.FLEN-P.S_LEN{1'b1}}, TestVector[8+32+P.S_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]}; + end + endcase + end + 2'b10: begin // half + // {is the integer a long, is the opperation to an integer} + casex ({OpCtrl[2:1]}) + 2'b11: begin // long -> half + X = {P.FLEN{1'bx}}; + SrcA = TestVector[8+P.H_LEN+P.XLEN-1:8+(P.H_LEN)]; + Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; + end + 2'b10: begin // int -> half + // correctly sign extend the integer depending on if it's a signed/unsigned test + X = {P.FLEN{1'bx}}; + SrcA = {{P.XLEN-32{TestVector[8+P.H_LEN+32-1]}}, TestVector[8+P.H_LEN+32-1:8+(P.H_LEN)]}; + Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; + end + 2'b01: begin // half -> long + X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+P.XLEN+P.H_LEN-1:8+(P.XLEN)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {TestVector[8+(P.XLEN-1):8]}; + end + 2'b00: begin // half -> int + X = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+32+P.H_LEN-1:8+(32)]}; + SrcA = {P.XLEN{1'bx}}; + Ans = {{P.XLEN-32{TestVector[8+32-1]}}, TestVector[8+(32-1):8]}; + end + endcase + end + endcase + endcase + end + + assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)|((Unit == `DIVUNIT)&OpCtrl[0]) | ((Unit == `INTDIVUNIT) & OpCtrl === `SQRT_OPCTRL)); + assign ZEn = (Unit == `FMAUNIT); + assign FPUActive = 1'b1; + + unpack #(P) unpack(.X, .Y, .Z, .Fmt(ModFmt), .FPUActive, .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, + .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, + .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, + .XEn, .YEn, .ZEn, .XExpMax, .XPostBox); + +endmodule diff --git a/tests/fp/combined_IF_vectors/create_IF_vectors.sh b/tests/fp/combined_IF_vectors/create_IF_vectors.sh index 707b2d5f4..7fe5897fb 100755 --- a/tests/fp/combined_IF_vectors/create_IF_vectors.sh +++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh @@ -1,5 +1,7 @@ #!/bin/sh # create test vectors for stand alone int +mkdir IF_vectors ./extract_testfloat_vectors.py ./extract_arch_vectors.py +cp IF_vectors/* ../vectors