mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
began porting over divremsqrt
This commit is contained in:
parent
0ce289c937
commit
fda6305d1c
561
bin/regression-wally-intdiv
Executable file
561
bin/regression-wally-intdiv
Executable file
@ -0,0 +1,561 @@
|
||||
#!/usr/bin/python3
|
||||
##################################
|
||||
#
|
||||
# regression-wally
|
||||
# David_Harris@Hmc.edu 25 January 2021
|
||||
# Modified by Jarred Allen <jaallen@g.hmc.edu>
|
||||
#
|
||||
# Run a regression with multiple configurations in parallel and exit with
|
||||
# non-zero status code if an error happened, as well as printing human-readable
|
||||
# output.
|
||||
#
|
||||
##################################
|
||||
import sys,os,shutil
|
||||
import multiprocessing
|
||||
|
||||
|
||||
|
||||
class bcolors:
|
||||
HEADER = '\033[95m'
|
||||
OKBLUE = '\033[94m'
|
||||
OKCYAN = '\033[96m'
|
||||
OKGREEN = '\033[92m'
|
||||
WARNING = '\033[93m'
|
||||
FAIL = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
UNDERLINE = '\033[4m'
|
||||
|
||||
from collections import namedtuple
|
||||
regressionDir = os.path.dirname(os.path.abspath(__file__))
|
||||
os.chdir(regressionDir)
|
||||
|
||||
coverage = '-coverage' in sys.argv
|
||||
fp = '-fp' in sys.argv
|
||||
nightly = '-nightly' in sys.argv
|
||||
softfloat = '-softfloat' in sys.argv
|
||||
intdiv = '-intdiv' in sys.argv
|
||||
|
||||
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
|
||||
# name: the name of this test configuration (used in printing human-readable
|
||||
# output and picking logfile names)
|
||||
# cmd: the command to run to test (should include the logfile as '{}', and
|
||||
# the command needs to write to that file)
|
||||
# grepstr: the string to grep through the log file for. The test succeeds iff
|
||||
# grep finds that string in the logfile (is used by grep, so it may
|
||||
# be any pattern grep accepts, see `man 1 grep` for more info).
|
||||
|
||||
# edit this list to add more test cases
|
||||
if (nightly):
|
||||
nightMode = "-nightly";
|
||||
configs = []
|
||||
else:
|
||||
nightMode = "";
|
||||
configs = [
|
||||
TestCase(
|
||||
name="lints",
|
||||
variant="all",
|
||||
cmd="./lint-wally " + nightMode + " | tee {}",
|
||||
grepstr="lints run with no errors or warnings"
|
||||
)
|
||||
]
|
||||
|
||||
def getBuildrootTC(boot):
|
||||
INSTR_LIMIT = 1000000 # multiple of 100000; 4M is interesting because it gets into the kernel and enabling VM
|
||||
MAX_EXPECTED = 246000000 # *** TODO: replace this with a search for the login prompt.
|
||||
if boot:
|
||||
name="buildrootboot"
|
||||
BRcmd="vsim > {} -c <<!\ndo wally.do buildroot buildroot-no-trace $RISCV 0 1 0\n!"
|
||||
BRgrepstr="WallyHostname login:"
|
||||
else:
|
||||
name="buildroot"
|
||||
if (coverage):
|
||||
print( "buildroot coverage")
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot $RISCV "+str(INSTR_LIMIT)+" 1 0 -coverage\n!"
|
||||
else:
|
||||
print( "buildroot no coverage")
|
||||
BRcmd="vsim > {} -c <<!\ndo wally-batch.do buildroot buildroot configOptions -GINSTR_LIMIT=" +str(INSTR_LIMIT) + " \n!"
|
||||
BRgrepstr=str(INSTR_LIMIT)+" instructions"
|
||||
return TestCase(name,variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
|
||||
|
||||
tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused
|
||||
|
||||
tests64i = ["arch64i"]
|
||||
for test in tests64i:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv64i",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64i "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused
|
||||
tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32f_divsqrt", "arch32d_divsqrt",
|
||||
"arch32i", "arch32priv", "arch32c", "arch32m", "arch32a", "arch32zifencei", "arch32zicond",
|
||||
"arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zfh", "arch32zfh_fma",
|
||||
"arch32zfh_divsqrt", "arch32zfaf", "wally32a", "wally32priv", "wally32periph",
|
||||
"arch32zbkb", "arch32zbkc", "arch32zbkx", "arch32zknd", "arch32zkne", "arch32zknh"] # "arch32zbc", "arch32zfad",
|
||||
#tests32gc = ["arch32f", "arch32d", "arch32f_fma", "arch32d_fma", "arch32i", "arch32priv", "arch32c", "arch32m", "arch32a", "arch32zifencei", "arch32zba", "arch32zbb", "arch32zbc", "arch32zbs", "arch32zicboz", "arch32zcb", "wally32a", "wally32priv", "wally32periph"]
|
||||
for test in tests32gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32gc "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32imcimperas = ["imperas32i", "imperas32c"] # unused
|
||||
tests32imc = ["arch32i", "arch32c", "arch32m", "wally32periph"]
|
||||
for test in tests32imc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32imc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32imc "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32i = ["arch32i"]
|
||||
for test in tests32i:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32i",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32i "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
|
||||
tests32e = ["arch32e"]
|
||||
for test in tests32e:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv32e",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv32e "+test+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64f_divsqrt", "arch64d_divsqrt", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", "arch64zfh", "arch64zfh_divsqrt", "arch64zfh_fma", "arch64zfaf", "arch64zfad", "arch64zbkb", "arch64zbkc", "arch64zbkx", "arch64zknd", "arch64zkne", "arch64zknh",
|
||||
"arch64priv", "arch64c", "arch64m", "arch64a", "arch64zifencei", "arch64zicond", "wally64a", "wally64periph", "wally64priv"] # add arch64zfh_fma when available; arch64zicobz, arch64zcb when working
|
||||
#tests64gc = ["arch64f", "arch64d", "arch64f_fma", "arch64d_fma", "arch64i", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs",
|
||||
# "arch64priv", "arch64c", "arch64m", "arch64a", "arch64zifencei", "wally64a", "wally64periph", "wally64priv", "arch64zicboz", "arch64zcb"]
|
||||
if (coverage): # delete all but 64gc tests when running coverage
|
||||
configs = []
|
||||
tests64gc = ["coverage64gc", "arch64i", "arch64priv", "arch64c", "arch64m",
|
||||
"arch64zifencei", "arch64zicond", "arch64a", "wally64a", "wally64periph", "wally64priv",
|
||||
"arch64zba", "arch64zbb", "arch64zbc", "arch64zbs"] # add when working: "arch64zcb", "arch64zicboz"
|
||||
if (fp):
|
||||
tests64gc.append("arch64f")
|
||||
tests64gc.append("arch64d")
|
||||
tests64gc.append("arch64zfh")
|
||||
tests64gc.append("arch64f_fma")
|
||||
tests64gc.append("arch64d_fma")
|
||||
tests64gc.append("arch64zfh_fma")
|
||||
tests64gc.append("arch64f_divsqrt")
|
||||
tests64gc.append("arch64d_divsqrt")
|
||||
tests64gc.append("arch64zfh_divsqrt")
|
||||
tests64gc.append("arch64zfaf")
|
||||
tests64gc.append("arch64zfad")
|
||||
coverStr = '-coverage'
|
||||
else:
|
||||
coverStr = ''
|
||||
for test in tests64gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
variant="rv64gc",
|
||||
cmd="vsim > {} -c <<!\ndo wally-batch.do rv64gc "+test+" " + coverStr + "\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
# run derivative configurations if requested
|
||||
if (nightly):
|
||||
derivconfigtests = [
|
||||
["tlb2_rv32gc", ["wally32priv"]],
|
||||
["tlb16_rv32gc", ["wally32priv"]],
|
||||
["tlb2_rv64gc", ["wally64priv"]],
|
||||
["tlb16_rv64gc", ["wally64priv"]],
|
||||
["way_1_4096_512_rv32gc", ["arch32i"]],
|
||||
["way_2_4096_512_rv32gc", ["arch32i"]],
|
||||
["way_8_4096_512_rv32gc", ["arch32i"]],
|
||||
["way_4_2048_512_rv32gc", ["arch32i"]],
|
||||
["way_4_4096_256_rv32gc", ["arch32i"]],
|
||||
["way_1_4096_512_rv64gc", ["arch64i"]],
|
||||
["way_2_4096_512_rv64gc", ["arch64i"]],
|
||||
["way_8_4096_512_rv64gc", ["arch64i"]],
|
||||
["way_4_2048_512_rv64gc", ["arch64i"]],
|
||||
["way_4_4096_256_rv64gc", ["arch64i"]],
|
||||
["way_4_4096_1024_rv64gc", ["arch64i"]],
|
||||
|
||||
["ram_0_0_rv64gc", ["ahb64"]],
|
||||
["ram_1_0_rv64gc", ["ahb64"]],
|
||||
["ram_1_1_rv64gc", ["ahb64"]],
|
||||
["ram_2_0_rv64gc", ["ahb64"]],
|
||||
["ram_2_1_rv64gc", ["ahb64"]],
|
||||
|
||||
["noicache_rv32gc", ["ahb32"]],
|
||||
# cacheless designs will not work until DTIM supports FLEN > XLEN
|
||||
# ["nodcache_rv32gc", ["ahb32"]],
|
||||
# ["nocache_rv32gc", ["ahb32"]],
|
||||
["noicache_rv64gc", ["ahb64"]],
|
||||
["nodcache_rv64gc", ["ahb64"]],
|
||||
["nocache_rv64gc", ["ahb64"]],
|
||||
|
||||
### add misaligned tests
|
||||
|
||||
["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]],
|
||||
["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
|
||||
### branch predictor simulation
|
||||
|
||||
# ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# # btb
|
||||
# ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# # ras
|
||||
# ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
# ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"],
|
||||
|
||||
# enable floating-point tests when lint is fixed
|
||||
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
|
||||
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
|
||||
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
|
||||
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
|
||||
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
|
||||
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
|
||||
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed
|
||||
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
|
||||
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
|
||||
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
|
||||
|
||||
|
||||
]
|
||||
for test in derivconfigtests:
|
||||
config = test[0];
|
||||
tests = test[1];
|
||||
if(len(test) >= 4 and test[2] == "configOptions"):
|
||||
configOptions = test[3]
|
||||
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
|
||||
else:
|
||||
configOptions = ""
|
||||
cmdPrefix = "vsim > {} -c <<!\ndo wally-batch.do "+config
|
||||
for t in tests:
|
||||
tc = TestCase(
|
||||
name=t,
|
||||
variant=config,
|
||||
cmd=cmdPrefix+" "+t+" configOptions "+configOptions+"\n!",
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
|
||||
|
||||
|
||||
# softfloat tests
|
||||
if (softfloat):
|
||||
configs = []
|
||||
softfloatconfigs = [
|
||||
"fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
|
||||
"fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
|
||||
"fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
|
||||
"fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
|
||||
"fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
|
||||
"fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
|
||||
"fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
|
||||
"fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
|
||||
"fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
|
||||
"fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
|
||||
"fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
|
||||
"fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
|
||||
"fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
|
||||
"fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
|
||||
"fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
|
||||
"fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
|
||||
"fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
|
||||
"fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
|
||||
"fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
|
||||
"fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
|
||||
"f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
|
||||
"f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
|
||||
"f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
|
||||
"f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
|
||||
]
|
||||
for config in softfloatconfigs:
|
||||
# div test case
|
||||
divtest = TestCase(
|
||||
name="div",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " div \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,divtest)
|
||||
|
||||
# sqrt test case
|
||||
sqrttest = TestCase(
|
||||
name="sqrt",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " sqrt \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
#configs.append(sqrttest)
|
||||
configs.insert(0,sqrttest)
|
||||
|
||||
|
||||
# skip if divider variant config
|
||||
if ("ieee" in config):
|
||||
# cvtint test case
|
||||
cvtinttest = TestCase(
|
||||
name="cvtint",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " cvtint \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.append(cvtinttest)
|
||||
|
||||
# cvtfp test case
|
||||
# WILL fail on F_only (refer to spec)
|
||||
cvtfptest = TestCase(
|
||||
name="cvtfp",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " cvtfp \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.append(cvtfptest)
|
||||
|
||||
# intdiv verification
|
||||
if (intdiv):
|
||||
configs = []
|
||||
# ***NOTE add to this
|
||||
|
||||
intdivconfigs = [
|
||||
"fdh_ieee_div_2_1i_rv32gc", "fdh_ieee_div_2_1i_rv64gc", "fdh_ieee_div_2_2i_rv32gc",
|
||||
"fdh_ieee_div_2_2i_rv64gc", "fdh_ieee_div_2_4i_rv32gc", "fdh_ieee_div_2_4i_rv64gc",
|
||||
"fdh_ieee_div_4_1i_rv32gc", "fdh_ieee_div_4_1i_rv64gc", "fdh_ieee_div_4_2i_rv32gc",
|
||||
"fdh_ieee_div_4_2i_rv64gc", "fdh_ieee_div_4_4i_rv32gc", "fdh_ieee_div_4_4i_rv64gc",
|
||||
"fd_ieee_div_2_1i_rv32gc", "fd_ieee_div_2_1i_rv64gc", "fd_ieee_div_2_2i_rv32gc",
|
||||
"fd_ieee_div_2_2i_rv64gc", "fd_ieee_div_2_4i_rv32gc", "fd_ieee_div_2_4i_rv64gc",
|
||||
"fd_ieee_div_4_1i_rv32gc", "fd_ieee_div_4_1i_rv64gc", "fd_ieee_div_4_2i_rv32gc",
|
||||
"fd_ieee_div_4_2i_rv64gc", "fd_ieee_div_4_4i_rv32gc", "fd_ieee_div_4_4i_rv64gc",
|
||||
"fdqh_ieee_div_2_1i_rv32gc", "fdqh_ieee_div_2_1i_rv64gc", "fdqh_ieee_div_2_2i_rv32gc",
|
||||
"fdqh_ieee_div_2_2i_rv64gc", "fdqh_ieee_div_2_4i_rv32gc", "fdqh_ieee_div_2_4i_rv64gc",
|
||||
"fdqh_ieee_div_4_1i_rv32gc", "fdqh_ieee_div_4_1i_rv64gc", "fdqh_ieee_div_4_2i_rv32gc",
|
||||
"fdqh_ieee_div_4_2i_rv64gc", "fdqh_ieee_div_4_4i_rv32gc", "fdqh_ieee_div_4_4i_rv64gc",
|
||||
"fdq_ieee_div_2_1i_rv32gc", "fdq_ieee_div_2_1i_rv64gc", "fdq_ieee_div_2_2i_rv32gc",
|
||||
"fdq_ieee_div_2_2i_rv64gc", "fdq_ieee_div_2_4i_rv32gc", "fdq_ieee_div_2_4i_rv64gc",
|
||||
"fdq_ieee_div_4_1i_rv32gc", "fdq_ieee_div_4_1i_rv64gc", "fdq_ieee_div_4_2i_rv32gc",
|
||||
"fdq_ieee_div_4_2i_rv64gc", "fdq_ieee_div_4_4i_rv32gc", "fdq_ieee_div_4_4i_rv64gc",
|
||||
"fh_ieee_div_2_1i_rv32gc", "fh_ieee_div_2_1i_rv64gc", "fh_ieee_div_2_2i_rv32gc",
|
||||
"fh_ieee_div_2_2i_rv64gc", "fh_ieee_div_2_4i_rv32gc", "fh_ieee_div_2_4i_rv64gc",
|
||||
"fh_ieee_div_4_1i_rv32gc", "fh_ieee_div_4_1i_rv64gc", "fh_ieee_div_4_2i_rv32gc",
|
||||
"fh_ieee_div_4_2i_rv64gc", "fh_ieee_div_4_4i_rv32gc", "fh_ieee_div_4_4i_rv64gc",
|
||||
"f_ieee_div_2_1i_rv32gc", "f_ieee_div_2_1i_rv64gc", "f_ieee_div_2_2i_rv32gc",
|
||||
"f_ieee_div_2_2i_rv64gc", "f_ieee_div_2_4i_rv32gc", "f_ieee_div_2_4i_rv64gc",
|
||||
"f_ieee_div_4_1i_rv32gc", "f_ieee_div_4_1i_rv64gc", "f_ieee_div_4_2i_rv32gc",
|
||||
"f_ieee_div_4_2i_rv64gc", "f_ieee_div_4_4i_rv32gc", "f_ieee_div_4_4i_rv64gc",
|
||||
"fd_ieee_div_2_8i_rv32gc",
|
||||
"fd_ieee_div_2_8i_rv64gc",
|
||||
"fdq_ieee_div_2_8i_rv64gc",
|
||||
"fdq_ieee_div_2_8i_rv32gc",
|
||||
"f_ieee_div_2_8i_rv64gc",
|
||||
"f_ieee_div_2_8i_rv32gc"
|
||||
]
|
||||
nointdivconfigs = [
|
||||
"fdh_ieee_div_2_1_rv32gc", "fdh_ieee_div_2_1_rv64gc", "fdh_ieee_div_2_2_rv32gc",
|
||||
"fdh_ieee_div_2_2_rv64gc", "fdh_ieee_div_2_4_rv32gc", "fdh_ieee_div_2_4_rv64gc",
|
||||
"fdh_ieee_div_4_1_rv32gc", "fdh_ieee_div_4_1_rv64gc", "fdh_ieee_div_4_2_rv32gc",
|
||||
"fdh_ieee_div_4_2_rv64gc", "fdh_ieee_div_4_4_rv32gc", "fdh_ieee_div_4_4_rv64gc",
|
||||
"fd_ieee_div_2_1_rv32gc", "fd_ieee_div_2_1_rv64gc", "fd_ieee_div_2_2_rv32gc",
|
||||
"fd_ieee_div_2_2_rv64gc", "fd_ieee_div_2_4_rv32gc", "fd_ieee_div_2_4_rv64gc",
|
||||
"fd_ieee_div_4_1_rv32gc", "fd_ieee_div_4_1_rv64gc", "fd_ieee_div_4_2_rv32gc",
|
||||
"fd_ieee_div_4_2_rv64gc", "fd_ieee_div_4_4_rv32gc", "fd_ieee_div_4_4_rv64gc",
|
||||
"fdqh_ieee_div_2_1_rv32gc", "fdqh_ieee_div_2_1_rv64gc", "fdqh_ieee_div_2_2_rv32gc",
|
||||
"fdqh_ieee_div_2_2_rv64gc", "fdqh_ieee_div_2_4_rv32gc", "fdqh_ieee_div_2_4_rv64gc",
|
||||
"fdqh_ieee_div_4_1_rv32gc", "fdqh_ieee_div_4_1_rv64gc", "fdqh_ieee_div_4_2_rv32gc",
|
||||
"fdqh_ieee_div_4_2_rv64gc", "fdqh_ieee_div_4_4_rv32gc", "fdqh_ieee_div_4_4_rv64gc",
|
||||
"fdq_ieee_div_2_1_rv32gc", "fdq_ieee_div_2_1_rv64gc", "fdq_ieee_div_2_2_rv32gc",
|
||||
"fdq_ieee_div_2_2_rv64gc", "fdq_ieee_div_2_4_rv32gc", "fdq_ieee_div_2_4_rv64gc",
|
||||
"fdq_ieee_div_4_1_rv32gc", "fdq_ieee_div_4_1_rv64gc", "fdq_ieee_div_4_2_rv32gc",
|
||||
"fdq_ieee_div_4_2_rv64gc", "fdq_ieee_div_4_4_rv32gc", "fdq_ieee_div_4_4_rv64gc",
|
||||
"fh_ieee_div_2_1_rv32gc", "fh_ieee_div_2_1_rv64gc", "fh_ieee_div_2_2_rv32gc",
|
||||
"fh_ieee_div_2_2_rv64gc", "fh_ieee_div_2_4_rv32gc", "fh_ieee_div_2_4_rv64gc",
|
||||
"fh_ieee_div_4_1_rv32gc", "fh_ieee_div_4_1_rv64gc", "fh_ieee_div_4_2_rv32gc",
|
||||
"fh_ieee_div_4_2_rv64gc", "fh_ieee_div_4_4_rv32gc", "fh_ieee_div_4_4_rv64gc",
|
||||
"f_ieee_div_2_1_rv32gc", "f_ieee_div_2_1_rv64gc", "f_ieee_div_2_2_rv32gc",
|
||||
"f_ieee_div_2_2_rv64gc", "f_ieee_div_2_4_rv32gc", "f_ieee_div_2_4_rv64gc",
|
||||
"f_ieee_div_4_1_rv32gc", "f_ieee_div_4_1_rv64gc", "f_ieee_div_4_2_rv32gc",
|
||||
"f_ieee_div_4_2_rv64gc", "f_ieee_div_4_4_rv32gc", "f_ieee_div_4_4_rv64gc"
|
||||
]
|
||||
|
||||
for config in intdivconfigs:
|
||||
# fdivremsqrt test case
|
||||
fdivremsqrttestcase = TestCase(
|
||||
name="fdivremsqrt",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " fdivremsqrt \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,fdivremsqrttestcase)
|
||||
for config in nointdivconfigs:
|
||||
# div,sqrt test cases for no integer flavor of divider
|
||||
divtestcase = TestCase(
|
||||
name="fdiv",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " div_drsu \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,divtestcase)
|
||||
sqrttestcase = TestCase(
|
||||
name="fsqrt",
|
||||
variant=config,
|
||||
cmd="vsim > {} -c <<!\ndo testfloat-batch.do " + config + " sqrt_drsu \n!",
|
||||
grepstr="All Tests completed with 0 errors"
|
||||
)
|
||||
configs.insert(0,sqrttestcase)
|
||||
|
||||
import os
|
||||
from multiprocessing import Pool, TimeoutError
|
||||
|
||||
def search_log_for_text(text, logfile):
|
||||
"""Search through the given log file for text, returning True if it is found or False if it is not"""
|
||||
grepcmd = "grep -e '%s' '%s' > /dev/null" % (text, logfile)
|
||||
return os.system(grepcmd) == 0
|
||||
|
||||
def run_test_case(config):
|
||||
"""Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
|
||||
logname = "logs/"+config.variant+"_"+config.name+".log"
|
||||
cmd = config.cmd.format(logname)
|
||||
# print(cmd)
|
||||
os.chdir(regressionDir)
|
||||
os.system(cmd)
|
||||
if search_log_for_text(config.grepstr, logname):
|
||||
print(f"{bcolors.OKGREEN}%s_%s: Success{bcolors.ENDC}" % (config.variant, config.name))
|
||||
return 0
|
||||
else:
|
||||
print(f"{bcolors.FAIL}%s_%s: Failures detected in output{bcolors.ENDC}" % (config.variant, config.name))
|
||||
print(" Check %s" % logname)
|
||||
return 1
|
||||
|
||||
def main():
|
||||
"""Run the tests and count the failures"""
|
||||
global configs, coverage
|
||||
try:
|
||||
os.chdir(regressionDir)
|
||||
os.mkdir("logs")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree("wkdir")
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
os.mkdir("wkdir")
|
||||
|
||||
if '-makeTests' in sys.argv:
|
||||
os.chdir(regressionDir)
|
||||
os.system('./make-tests.sh | tee ./logs/make-tests.log')
|
||||
|
||||
if '-all' in sys.argv:
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs.append(getBuildrootTC(boot=True))
|
||||
elif '-buildroot' in sys.argv:
|
||||
TIMEOUT_DUR = 30*7200 # seconds
|
||||
configs=[getBuildrootTC(boot=True)]
|
||||
elif '-coverage' in sys.argv:
|
||||
TIMEOUT_DUR = 20*60 # seconds
|
||||
# Presently don't run buildroot because it has a different config and can't be merged with the rv64gc coverage.
|
||||
# Also it is slow to run.
|
||||
# configs.append(getBuildrootTC(boot=False))
|
||||
os.system('rm -f cov/*.ucdb')
|
||||
elif '-nightly' in sys.argv:
|
||||
TIMEOUT_DUR = 60*1440 # 1 day
|
||||
configs.append(getBuildrootTC(boot=False))
|
||||
elif '-softfloat' in sys.argv:
|
||||
TIMEOUT_DUR = 60*60 # seconds
|
||||
elif '-intdiv' in sys.argv:
|
||||
TIMEOUT_DUR = 60*60 # seconds
|
||||
else:
|
||||
TIMEOUT_DUR = 10*60 # seconds
|
||||
configs.append(getBuildrootTC(boot=False))
|
||||
|
||||
# Scale the number of concurrent processes to the number of test cases, but
|
||||
# max out at a limited number of concurrent processes to not overwhelm the system
|
||||
with Pool(processes=min(len(configs),multiprocessing.cpu_count())) as pool:
|
||||
num_fail = 0
|
||||
results = {}
|
||||
for config in configs:
|
||||
results[config] = pool.apply_async(run_test_case,(config,))
|
||||
for (config,result) in results.items():
|
||||
try:
|
||||
num_fail+=result.get(timeout=TIMEOUT_DUR)
|
||||
except TimeoutError:
|
||||
num_fail+=1
|
||||
print(f"{bcolors.FAIL}%s_%s: Timeout - runtime exceeded %d seconds{bcolors.ENDC}" % (config.variant, config.name, TIMEOUT_DUR))
|
||||
|
||||
# Coverage report
|
||||
if coverage:
|
||||
os.system('make coverage')
|
||||
# Count the number of failures
|
||||
if num_fail:
|
||||
print(f"{bcolors.FAIL}Regression failed with %s failed configurations{bcolors.ENDC}" % num_fail)
|
||||
else:
|
||||
print(f"{bcolors.OKGREEN}SUCCESS! All tests ran without failures{bcolors.ENDC}")
|
||||
return num_fail
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
@ -123,6 +123,10 @@ localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (FMALEN
|
||||
|
||||
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
|
||||
|
||||
localparam CORRSHIFTSZ = `max((NORMSHIFTSZ-2), (DIVMINb + 1 + NF));
|
||||
localparam NORMSHIFTSZDRSU = DIVb+1+NF;
|
||||
localparam LOGNORMSHIFTSZDRSU = $clog2(NORMSHIFTSZDRSU);
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
/* verilator lint_off STMTDLY */
|
||||
|
@ -194,6 +194,8 @@ localparam cvw_t P = '{
|
||||
FMALEN : FMALEN,
|
||||
NORMSHIFTSZ : NORMSHIFTSZ,
|
||||
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
|
||||
NORMSHIFTSZDRSU : NORMSHIFTSZDRSU,
|
||||
LOGNORMSHIFTSZDRSU : LOGNORMSHIFTSZDRSU,
|
||||
LOGR : LOGR,
|
||||
RK : RK,
|
||||
FPDUR : FPDUR,
|
||||
|
@ -285,6 +285,8 @@ typedef struct packed {
|
||||
int LOGCVTLEN;
|
||||
int NORMSHIFTSZ;
|
||||
int LOGNORMSHIFTSZ;
|
||||
int NORMSHIFTSZDRSU;
|
||||
int LOGNORMSHIFTSZDRSU;
|
||||
int FMALEN;
|
||||
|
||||
// division constants
|
||||
|
9
src/fpu/divremsqrt/arithrightshift.sv
Normal file
9
src/fpu/divremsqrt/arithrightshift.sv
Normal file
@ -0,0 +1,9 @@
|
||||
|
||||
module arithrightshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic signed [P.INTDIVb+3:0] shiftin,
|
||||
output logic signed [P.INTDIVb+3:0] shifted
|
||||
);
|
||||
assign shifted = $signed(shiftin) >>> P.LOGR;
|
||||
|
||||
endmodule
|
||||
|
111
src/fpu/divremsqrt/divremsqrt.sv
Normal file
111
src/fpu/divremsqrt/divremsqrt.sv
Normal file
@ -0,0 +1,111 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrt.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified:19 May 2023
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.NE+1:0] UeM,
|
||||
output logic [P.DIVb:0] UmM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM,
|
||||
output logic IntDivM,
|
||||
// integer normalization shifter signals
|
||||
output logic [P.INTDIVb+3:0] PreResultM,
|
||||
input logic [P.XLEN-1:0] PreIntResultM,
|
||||
output logic [P.DIVBLEN-1:0] IntNormShiftM
|
||||
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [P.DURLEN:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
|
||||
divremsqrtfdivsqrtpreproc #(P) divremsqrtfdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
|
||||
// Int-specific
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
||||
.BZeroM, .AM,
|
||||
.IntDivM, .W64M, .ALTBM, .AsM, .BsM, .IntNormShiftM, .SIGNOVERFLOWM, .ZeroDiffM);
|
||||
|
||||
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
|
||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
|
||||
// Int-specific
|
||||
.IDivStartE, .ISpecialCaseE, .IntDivE);
|
||||
|
||||
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
|
||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
|
||||
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
|
||||
|
||||
divremsqrtfdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
||||
.UmM, .WZeroE, .DivStickyM,
|
||||
// Int-specific
|
||||
.ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||
.FIntDivResultM, .PreResultM, .PreIntResultM, .SIGNOVERFLOWM, .ZeroDiffM, .IntDivM, .IntNormShiftM);
|
||||
|
||||
|
||||
endmodule
|
||||
|
73
src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
Normal file
73
src/fpu/divremsqrt/divremsqrtdivshiftcalc.sv
Normal file
@ -0,0 +1,73 @@
|
||||
///////////////////////////////////////////
|
||||
// divshiftcalc.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Division shift calculation
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtdivshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NF+2:0] DivUm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||
output logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt, // divsqrt shift amount
|
||||
output logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn, // divsqrt shift input
|
||||
output logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
output logic DivSubnormShiftPos // is the subnormal shift amount positive
|
||||
);
|
||||
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] NormShift; // normalized result shift amount
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negative)
|
||||
logic [P.NE+1:0] DivSubnormShift; // subnormal result shift amount
|
||||
|
||||
// is the result subnormal
|
||||
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
|
||||
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
|
||||
|
||||
// if the result is subnormal
|
||||
// 00000000x.xxxxxx... Exp = DivUe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
|
||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||
// Left shift amount = DivUe+NF+1-1
|
||||
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
|
||||
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
|
||||
|
||||
// if the result is normalized
|
||||
// 00000000x.xxxxxx... Exp = DivUe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivUe+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
|
||||
// inital Left shift amount = NF
|
||||
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
||||
assign NormShift = (P.LOGNORMSHIFTSZDRSU)'(P.NF);
|
||||
|
||||
// if the shift amount is negative then don't shift (keep sticky bit)
|
||||
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
|
||||
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZDRSU-1:0] : 0;
|
||||
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
|
||||
|
||||
// pre-shift the divider result for normalization
|
||||
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZDRSU-(P.NF+2)-1-P.NF{1'b0}}};
|
||||
endmodule
|
27
src/fpu/divremsqrt/divremsqrtearlyterm.sv
Normal file
27
src/fpu/divremsqrt/divremsqrtearlyterm.sv
Normal file
@ -0,0 +1,27 @@
|
||||
module divremsqrtearlyterm import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+3:0] D, // Q4.DIVb
|
||||
input logic [P.DIVb:0] FirstUM, // U1.DIVb
|
||||
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||
input logic Firstun, SqrtE,
|
||||
output logic WZeroE
|
||||
);
|
||||
logic weq0E;
|
||||
aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
|
||||
if (P.RADIX == 2) begin: R2EarlyTerm
|
||||
logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
|
||||
logic [P.DIVb+2:0] FirstK;
|
||||
logic wfeq0E;
|
||||
logic [P.DIVb+3:0] WCF, WSF;
|
||||
|
||||
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
|
||||
assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
|
||||
assign FZeroDivE = D << 1; // F for divide
|
||||
mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
|
||||
csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
|
||||
aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
|
||||
assign WZeroE = weq0E|wfeq0E;
|
||||
end else begin
|
||||
assign WZeroE = weq0E;
|
||||
end
|
||||
endmodule
|
116
src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
Normal file
116
src/fpu/divremsqrt/divremsqrtfdivsqrtpostproc.sv
Normal file
@ -0,0 +1,116 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtpostproc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Divide/Square root postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtfdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
input logic StallM,
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+3:0] D, // Q4.DIVb
|
||||
input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
|
||||
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||
input logic SqrtE,
|
||||
input logic Firstun, SqrtM, SpecialCaseM,
|
||||
input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0)
|
||||
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M, SIGNOVERFLOWM, ZeroDiffM, IntDivM,
|
||||
input logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
input logic [P.XLEN-1:0] PreIntResultM,
|
||||
output logic [P.DIVb:0] UmM, // U1.DIVb result significand
|
||||
output logic WZeroE,
|
||||
output logic DivStickyM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM, // U/Q(XLEN.0)
|
||||
output logic [P.INTDIVb+3:0] PreResultM
|
||||
|
||||
);
|
||||
|
||||
logic [P.DIVb+3:0] Sum;
|
||||
logic [P.INTDIVb+3:0] W;
|
||||
logic [P.DIVb:0] PreUmM;
|
||||
logic NegStickyM;
|
||||
logic weq0E, WZeroM;
|
||||
logic [P.XLEN-1:0] IntDivResultM;
|
||||
logic NegQuotM; // Integer quotient is negative
|
||||
|
||||
//////////////////////////
|
||||
// Execute Stage: Detect early termination for an exact result
|
||||
//////////////////////////
|
||||
|
||||
// check for early termination on an exact result.
|
||||
divremsqrtearlyterm #(P) earlyterm(.FirstC, .FirstUM, .D, .SqrtE, .WC, .WS,.Firstun, .WZeroE);
|
||||
|
||||
|
||||
//////////////////////////
|
||||
// E/M Pipeline register
|
||||
//////////////////////////
|
||||
|
||||
flopenr #(1) WZeroMReg(clk, reset, ~StallM, WZeroE, WZeroM);
|
||||
|
||||
//////////////////////////
|
||||
// Memory Stage: Postprocessing
|
||||
//////////////////////////
|
||||
|
||||
// If the result is not exact, the sticky should be set
|
||||
assign DivStickyM = ~WZeroM & ~SpecialCaseM;
|
||||
|
||||
// Determine if sticky bit is negative *** Full sum only needed for Integer
|
||||
assign Sum = WC + WS;
|
||||
assign NegStickyM = Sum[P.DIVb+3];
|
||||
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
|
||||
|
||||
// Integer quotient or remainder correction, normalization, and special cases
|
||||
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||
logic [P.INTDIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||
logic signed [P.INTDIVb+3:0] PreResultM, PreResultShiftedM, PreIntResultM;
|
||||
logic [P.INTDIVb+3:0] DTrunc, SumTrunc;
|
||||
|
||||
assign SumTrunc = Sum[P.DIVb+3:P.DIVb-P.INTDIVb];
|
||||
assign DTrunc = D[P.DIVb+3:P.DIVb-P.INTDIVb];
|
||||
arithrightshift #(P) rshift(SumTrunc, W);
|
||||
|
||||
assign UnsignedQuotM = {3'b000, PreUmM[P.DIVb:P.DIVb-P.INTDIVb]};
|
||||
|
||||
// Integer remainder: sticky and sign correction muxes
|
||||
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
|
||||
mux2 #(P.INTDIVb+4) normremdmux(W, W+DTrunc, NegStickyM, NormRemDM);
|
||||
|
||||
// Select quotient or remainder and do normalization shift
|
||||
mux2 #(P.INTDIVb+4) presresultmux(UnsignedQuotM, NormRemDM, RemOpM, PreResultM);
|
||||
intrightshift #(P) intnormshifter(PreResultM, IntNormShiftM, PreResultShiftedM);
|
||||
mux2 #(P.INTDIVb+4) preintresultmux(PreResultShiftedM, -PreResultShiftedM,AsM ^ (BsM&~RemOpM), PreIntResultM);
|
||||
|
||||
divremsqrtintspecialcase #(P) intspecialcase(BZeroM,RemOpM, ALTBM,AM,PreIntResultM,IntDivResultM);
|
||||
// sign extend result for W64
|
||||
if (P.XLEN==64) begin
|
||||
mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
|
||||
{{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
|
||||
W64M, FIntDivResultM);
|
||||
end else
|
||||
assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
|
||||
end
|
||||
endmodule
|
250
src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
Normal file
250
src/fpu/divremsqrt/divremsqrtfdivsqrtpreproc.sv
Normal file
@ -0,0 +1,250 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtpreproc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtfdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [P.NF:0] Xm, Ym, // Floating-point significands
|
||||
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [P.NE+1:0] UeM, // biased exponent of result
|
||||
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||
// Int-specific
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
|
||||
input logic IntDivE, W64E,
|
||||
// Outputs
|
||||
output logic ISpecialCaseE,
|
||||
output logic [P.DURLEN:0] CyclesE,
|
||||
output logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
output logic ALTBM, IntDivM, W64M, SIGNOVERFLOWM, ZeroDiffM,
|
||||
output logic AsM, BsM, BZeroM,
|
||||
output logic [P.XLEN-1:0] AM
|
||||
);
|
||||
|
||||
logic [P.DIVb:0] Xnorm, Dnorm;
|
||||
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
|
||||
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs
|
||||
logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic SIGNOVERFLOWE;
|
||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||
logic SignedDivE; // signed division
|
||||
logic AsE, BsE; // Signs of integer inputs
|
||||
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||
logic ALTBE;
|
||||
logic EvenExp;
|
||||
|
||||
logic [$clog2(P.RK):0] RightShiftX;
|
||||
logic [P.DIVBLEN-1:0] ZeroDiff, p;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Preprocessing
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||
logic [P.XLEN-1:0] BE, PosA, PosB;
|
||||
|
||||
// Extract inputs, signs, zero, depending on W64 mode if applicable
|
||||
assign SignedDivE = ~Funct3E[0];
|
||||
|
||||
// Source handling
|
||||
if (P.XLEN==64) begin // 64-bit, supports W64
|
||||
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
|
||||
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
|
||||
end else begin // 32 bits only
|
||||
assign AE = ForwardedSrcAE;
|
||||
assign BE = ForwardedSrcBE;
|
||||
end
|
||||
assign AZeroE = ~(|AE);
|
||||
assign BZeroE = ~(|BE);
|
||||
assign AsE = AE[P.XLEN-1] & SignedDivE;
|
||||
assign BsE = BE[P.XLEN-1] & SignedDivE;
|
||||
|
||||
// Force integer inputs to be postiive
|
||||
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
|
||||
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||
|
||||
// Select integer or floating point inputs
|
||||
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
|
||||
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
|
||||
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
||||
end else begin // Int not supported
|
||||
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
|
||||
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
|
||||
assign NumerZeroE = XZeroE;
|
||||
end
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer & FP leading zero and normalization shift
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
// count leading zeros for Subnorm FP and to normalize integer inputs
|
||||
divremsqrtlzc #(P.DIVb+1) lzcX (IFX, ell);
|
||||
divremsqrtlzc #(P.DIVb+1) lzcY (IFD, mE);
|
||||
|
||||
// Normalization shift: shift leading one into most significant bit
|
||||
assign Xnorm = (IFX << ell);
|
||||
assign Dnorm = (IFD << mE);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Right Shift to digit boundary
|
||||
// Determine DivXShifted (X shifted to digit boundary)
|
||||
// and nE (number of fractional digits)
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||
|
||||
// calculate number of result bits
|
||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||
assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros)
|
||||
assign SIGNOVERFLOWE = 1'b0;
|
||||
|
||||
mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
// Integer special cases (terminate immediately)
|
||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||
|
||||
// calculate right shift amount RightShiftX to complete in discrete number of steps
|
||||
if (P.RK > 1) begin // more than 1 bit per cycle
|
||||
|
||||
/* verilator lint_offf WIDTH */
|
||||
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
|
||||
/* verilator lint_on WIDTH */
|
||||
end else begin // radix 2 1 copy doesn't require shifting
|
||||
assign DivXShifted = DivX;
|
||||
assign RightShiftX = 0;
|
||||
end
|
||||
end else begin
|
||||
assign ISpecialCaseE = 0;
|
||||
end
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Floating-Point Preprocessing
|
||||
// Extend to Q4.b format
|
||||
// shift square root to be in range [1/4, 1)
|
||||
// Normalized numbers are shifted right by 1 if the exponent is odd
|
||||
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
||||
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
|
||||
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
|
||||
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
|
||||
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
|
||||
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
|
||||
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
|
||||
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
|
||||
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
||||
// Radix Exponent odd Exponent Even
|
||||
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
||||
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
|
||||
// Summary: PreSqrtX = r(x/2or4 - 1)
|
||||
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
||||
mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even
|
||||
|
||||
/*
|
||||
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
|
||||
// This saves one bit in DIVb because there is no initial right shift.
|
||||
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
|
||||
// That is an optimization for another day.
|
||||
if (P.RADIX == 2) begin
|
||||
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||
end else begin
|
||||
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
|
||||
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
||||
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
||||
end
|
||||
*/
|
||||
|
||||
// Initialize X for division or square root
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Selet integer or floating-point operands
|
||||
//////////////////////////////////////////////////////
|
||||
if (P.IDIV_ON_FPU) begin
|
||||
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
||||
end else begin
|
||||
assign X = PreShiftX;
|
||||
end
|
||||
|
||||
// Divisior register
|
||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
|
||||
|
||||
// Floating-point exponent
|
||||
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
|
||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
|
||||
|
||||
// Number of FSM cycles (to FSM)
|
||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
|
||||
logic RemOpE;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign IntDivNormShiftE = P.INTDIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
|
||||
assign IntRemNormShiftE = mE + (P.INTDIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
|
||||
/* verilator lint_on WIDTH */
|
||||
assign RemOpE = Funct3E[1];
|
||||
mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
|
||||
|
||||
// pipeline registers
|
||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
|
||||
flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
|
||||
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||
if (P.XLEN==64)
|
||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
183
src/fpu/divremsqrt/divremsqrtflags.sv
Normal file
183
src/fpu/divremsqrt/divremsqrtflags.sv
Normal file
@ -0,0 +1,183 @@
|
||||
|
||||
///////////////////////////////////////////
|
||||
// flags.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Post-Processing flag calculation
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtflags import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic XInf, YInf, // inputs are infinity
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic XSNaN, YSNaN, // inputs are signaling NaNs
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
input logic [P.NE+1:0] Me, // exponent of the normalized sum
|
||||
// rounding
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Round, Guard, Sticky, // bits used to determine rounding
|
||||
input logic UfPlus1, // do you add one for rounding for the unbounded exponent result
|
||||
// divsqrt
|
||||
input logic DivOp, // conversion opperation?
|
||||
input logic Sqrt, // Sqrt?
|
||||
// flags
|
||||
output logic DivByZero, // divide by zero flag
|
||||
output logic Overflow, // overflow flag to select result
|
||||
output logic Invalid, // invalid flag to select the result
|
||||
output logic [4:0] PostProcFlg // flags
|
||||
);
|
||||
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic Inexact; // final inexact flag
|
||||
logic FpInexact; // floating point inexact flag
|
||||
logic DivInvalid; // integer invalid flag
|
||||
logic Underflow; // Underflow flag
|
||||
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Overflow
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine if the result exponent is greater than or equal to the maximum exponent or
|
||||
// the shift amount is greater than the integers size (for cvt to int)
|
||||
// ShiftGtIntSz calculation:
|
||||
// a left shift of intlen+1 is still in range but any more than that is an overflow
|
||||
// inital: | 64 0's | XLEN |
|
||||
// | 64 0's | XLEN | << 64
|
||||
// | XLEN | 00000... |
|
||||
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// larger or equal if:
|
||||
// - any of the bits after the most significan 1 is one
|
||||
// - the most signifcant in 65 or 33 is still a one in the number and
|
||||
// one of the later bits is one
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
|
||||
default: ResExpGteMax = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
|
||||
P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
|
||||
P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
|
||||
P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// calulate overflow flag:
|
||||
// if the result is greater than or equal to the max exponent(not taking into account sign)
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Underflow
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate underflow flag: detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
// | the result is subnormal
|
||||
// | | the result is normal and rounded from a Subnorm
|
||||
// | | | and if given an unbounded exponent the result does not round
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
//assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky&~XZero)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|(Sticky)|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Inexact
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
//assign FpInexact = ((Sticky&~XZero)|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = FpInexact;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Invalid
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
|
||||
assign SigNaN = (XSNaN) | (YSNaN) ;
|
||||
|
||||
//invalid flag for division
|
||||
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
|
||||
|
||||
assign Invalid = SigNaN | (DivInvalid&DivOp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Divide by Zero
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// if dividing by zero and not 0/0
|
||||
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
|
||||
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// final flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Combine flags
|
||||
// - to integer results do not set the underflow or overflow flags
|
||||
assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
15
src/fpu/divremsqrt/divremsqrtintspecialcase.sv
Normal file
15
src/fpu/divremsqrt/divremsqrtintspecialcase.sv
Normal file
@ -0,0 +1,15 @@
|
||||
module divremsqrtintspecialcase import cvw::*; #(parameter cvw_t P) (
|
||||
input logic BZeroM,RemOpM, ALTBM,
|
||||
input logic [P.XLEN-1:0] AM,
|
||||
input signed [P.INTDIVb+3:0] PreIntResultM,
|
||||
output logic [P.XLEN-1:0] IntDivResultM
|
||||
);
|
||||
always_comb
|
||||
if (BZeroM) begin // Divide by zero
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = {(P.XLEN){1'b1}};
|
||||
end else if (ALTBM) begin // Numerator is small
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = 0;
|
||||
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
||||
endmodule
|
39
src/fpu/divremsqrt/divremsqrtlzc.sv
Normal file
39
src/fpu/divremsqrt/divremsqrtlzc.sv
Normal file
@ -0,0 +1,39 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Leading Zero Counter
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtlzc #(parameter WIDTH = 1) (
|
||||
input logic [WIDTH-1:0] num, // number to count the leading zeroes of
|
||||
output logic [$clog2(WIDTH)-1:0] ZeroCnt // the number of leading zeroes
|
||||
);
|
||||
|
||||
integer i;
|
||||
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while ((i < WIDTH) & ~num[WIDTH-1-i]) i = i+1; // search for leading one
|
||||
ZeroCnt = i[$clog2(WIDTH)-1:0];
|
||||
end
|
||||
endmodule
|
81
src/fpu/divremsqrt/divremsqrtnormshift.sv
Normal file
81
src/fpu/divremsqrt/divremsqrtnormshift.sv
Normal file
@ -0,0 +1,81 @@
|
||||
///////////////////////////////////////////
|
||||
// normshift.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: normalization shifter
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// convert shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if necessary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if necessary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if necessary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if necessary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is subnormal or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if necessary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is subnormal:
|
||||
// | lzcIn | 0's if necessary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if necessary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
// fma shift
|
||||
// | 00 | Sm | << LZA output
|
||||
// .
|
||||
// - two extra bits so we can correct for an LZA error of 1 or 2
|
||||
|
||||
// divsqrt shift
|
||||
// | Nf 0's | Qm | << calculated shift amount
|
||||
// .
|
||||
|
||||
module divremsqrtnormshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt, // shift amount
|
||||
input logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn, // number to be shifted
|
||||
output logic [P.NORMSHIFTSZDRSU-1:0] Shifted // shifted result
|
||||
);
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
endmodule
|
177
src/fpu/divremsqrt/divremsqrtpostprocess.sv
Normal file
177
src/fpu/divremsqrt/divremsqrtpostprocess.sv
Normal file
@ -0,0 +1,177 @@
|
||||
///////////////////////////////////////////
|
||||
// postprocess.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) (
|
||||
// general signals
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [P.NF:0] Xm, Ym, // input mantissas
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [3:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic XInf, YInf, // inputs are infinity
|
||||
input logic XNaN, YNaN, // inputs are NaN
|
||||
input logic XSNaN, YSNaN, // inputs are signaling NaNs
|
||||
input logic [1:0] PostProcSel, // select result to be written to fp register
|
||||
//fma signals
|
||||
//divide signals
|
||||
input logic DivSticky, // divider sticky bit
|
||||
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||
input logic [P.NF+2:0] DivUm, // divsqrt significand
|
||||
input logic [P.DIVBLEN-1:0] IntNormShiftM, // integer normalization left-shift amount (after pre-shifting right)
|
||||
input logic [P.INTDIVb+3:0] PreResultM, // integer result to be shifted
|
||||
input logic IntDivM,
|
||||
// final results
|
||||
output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result
|
||||
output logic [4:0] PostProcFlg, // postprocesser flags
|
||||
output logic [P.XLEN-1:0] PreIntResultM // normalized integer result
|
||||
);
|
||||
|
||||
|
||||
// general signals
|
||||
logic Rs; // result sign
|
||||
logic [P.NF-1:0] Rf; // Result fraction
|
||||
logic [P.NE-1:0] Re; // Result exponent
|
||||
logic Ms; // norMalized sign
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] Mf; // norMalized fraction
|
||||
logic [P.NE+1:0] Me; // normalized exponent
|
||||
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] ShiftAmt; // normalization shift amount
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] ShiftIn; // input to normalization shift
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
|
||||
logic Plus1; // add one to the final result?
|
||||
logic Overflow; // overflow flag used to select results
|
||||
logic Invalid; // invalid flag used to select results
|
||||
logic Guard, Round, Sticky; // bits needed to determine rounding
|
||||
logic [P.FMTBITS-1:0] OutFmt; // output format
|
||||
// division singals
|
||||
logic [P.LOGNORMSHIFTSZDRSU-1:0] DivShiftAmt; // divsqrt shif amount
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] DivShiftIn; // divsqrt shift input
|
||||
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
|
||||
logic DivByZero; // divide by zero flag
|
||||
logic DivResSubnorm; // is the divsqrt result subnormal
|
||||
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
||||
// conversion signals
|
||||
logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter
|
||||
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
|
||||
logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result
|
||||
logic CvtResUf; // did the convert result underflow
|
||||
logic IntInvalid; // invalid integer flag
|
||||
// readability signals
|
||||
logic Mult; // multiply opperation
|
||||
logic Sqrt; // is the divsqrt opperation sqrt
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic CvtOp; // convertion opperation
|
||||
logic DivOp; // divider opperation
|
||||
logic InfIn; // are any of the inputs infinity
|
||||
logic NaNIn; // are any of the inputs NaN
|
||||
|
||||
// signals to help readability
|
||||
|
||||
assign DivOp = (PostProcSel == 2'b01);
|
||||
assign Sqrt = OpCtrl[0];
|
||||
|
||||
// is there an input of infinity or NaN being used
|
||||
assign InfIn = XInf|YInf;
|
||||
assign NaNIn = XNaN|YNaN;
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: Fmt contains the percision of the output
|
||||
if (P.FPSIZES == 2)
|
||||
//assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT);
|
||||
assign OutFmt = Fmt;
|
||||
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
//assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
|
||||
assign OutFmt = Fmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// final claulations before shifting
|
||||
|
||||
divremsqrtdivshiftcalc #(P) divremsqrtdivshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
assign ShiftAmt = DivShiftAmt;
|
||||
assign ShiftIn = DivShiftIn;
|
||||
|
||||
// main normalization shift
|
||||
divremsqrtnormshift #(P) divremsqrtnormshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
// correct for LZA/divsqrt error
|
||||
divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp(1'b1), .DivUe, .Ue, .Shifted, .Mf);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
// calulate result sign used in rounding unit
|
||||
divremsqrtroundsign #(P) roundsign( .DivOp(1'b1), .Sqrt, .Xs, .Ys, .Ms);
|
||||
|
||||
divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Ue,
|
||||
.Ms, .Mf, .DivSticky, .DivOp(1'b1), .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign Rs = Ms;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero,
|
||||
.Xs, .OutFmt, .Sqrt,
|
||||
.NaNIn, .Round, .DivByZero,
|
||||
.Guard, .Sticky, .UfPlus1,.DivOp(1'b1), .FullRe, .Plus1,
|
||||
.Me, .Invalid, .Overflow, .PostProcFlg);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
|
||||
|
||||
divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero,
|
||||
.Frm, .OutFmt, .XNaN, .YNaN,
|
||||
.NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
|
||||
.XInf, .YInf, .DivOp(1'b1), .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
|
||||
|
||||
endmodule
|
267
src/fpu/divremsqrt/divremsqrtround.sv
Normal file
267
src/fpu/divremsqrt/divremsqrtround.sv
Normal file
@ -0,0 +1,267 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtround.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu, me@KatherineParry.com
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Rounder
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
module divremsqrtround import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic Ms, // normalized sign
|
||||
input logic [P.NORMSHIFTSZDRSU-1:0] Mf, // normalized fraction
|
||||
// divsqrt
|
||||
input logic DivOp, // is a division opperation being done
|
||||
input logic DivSticky, // divsqrt sticky bit
|
||||
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
|
||||
// outputs
|
||||
output logic [P.NE+1:0] Me, // normalied fraction
|
||||
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
|
||||
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
output logic [P.NE-1:0] Re, // Result exponent
|
||||
output logic [P.NF-1:0] Rf, // Result fractionNormS
|
||||
output logic Sticky, // sticky bit
|
||||
output logic Plus1, // do you add one to the final result
|
||||
output logic Round, Guard // bits needed to calculate rounding
|
||||
);
|
||||
|
||||
logic UfCalcPlus1; // calculated plus one for unbounded exponent
|
||||
logic NormSticky; // normalized sum's sticky bit
|
||||
logic [P.NF-1:0] RoundFrac; // rounded fraction
|
||||
logic FpGuard, FpRound; // floating point round/guard bits
|
||||
logic FpLsbRes; // least significant bit of floating point result
|
||||
logic LsbRes; // lsb of result
|
||||
logic CalcPlus1; // calculated plus1
|
||||
logic FpPlus1; // do you add one to the fp result
|
||||
logic [P.FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - plus 1 otherwise
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - Plus 1 otherwise
|
||||
|
||||
|
||||
// determine what format the final result is in: int or fp
|
||||
|
||||
// sticky bit calculation
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~OutFmt)) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
|
||||
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.NF2-2:P.NORMSHIFTSZDRSU-P.NF1-1]&(OutFmt==P.FMT2)) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.NF1-2:P.NORMSHIFTSZDRSU-P.NF-1]&(~(OutFmt==P.FMT))) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
assign NormSticky = (|Mf[P.NORMSHIFTSZDRSU-P.H_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&(OutFmt==P.H_FMT)) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.S_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.S_FMT))) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.D_NF-2:P.NORMSHIFTSZDRSU-P.Q_NF-1]&((OutFmt==P.D_FMT))) |
|
||||
(|Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2:0]&(OutFmt==P.Q_FMT));
|
||||
end
|
||||
|
||||
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
//assign Sticky = DivSticky&DivOp | NormSticky | StickySubnorm;
|
||||
assign Sticky = DivSticky&DivOp | NormSticky;
|
||||
//assign Sticky = DivSticky&DivOp;
|
||||
|
||||
|
||||
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
|
||||
assign FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
|
||||
assign FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign FpGuard = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-1] : Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
|
||||
assign FpLsbRes = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF] : Mf[P.NORMSHIFTSZDRSU-P.NF1];
|
||||
assign FpRound = OutFmt ? Mf[P.NORMSHIFTSZDRSU-P.NF-2] : Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF-2];
|
||||
end
|
||||
P.FMT1: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF1-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF1];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF1-2];
|
||||
end
|
||||
P.FMT2: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.NF2-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.NF2];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpGuard = 1'bx;
|
||||
FpLsbRes = 1'bx;
|
||||
FpRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.Q_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.D_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.D_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.S_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.S_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpGuard = Mf[P.NORMSHIFTSZDRSU-P.H_NF-1];
|
||||
FpLsbRes = Mf[P.NORMSHIFTSZDRSU-P.H_NF];
|
||||
FpRound = Mf[P.NORMSHIFTSZDRSU-P.H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
assign Guard = FpGuard;
|
||||
assign LsbRes = FpLsbRes;
|
||||
assign Round = FpRound;
|
||||
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (Frm)
|
||||
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = Ms;//round down
|
||||
3'b011: CalcPlus1 = ~Ms;//round up
|
||||
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (Frm)
|
||||
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = Ms;//round down
|
||||
3'b011: UfCalcPlus1 = ~Ms;//round up
|
||||
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
|
||||
assign FpPlus1 = Plus1;
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
|
||||
|
||||
|
||||
|
||||
|
||||
// place Plus1 into the proper position for the format
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
|
||||
|
||||
end else if (P.FPSIZES == 4)
|
||||
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
|
||||
|
||||
|
||||
|
||||
// trim unneeded bits from fraction
|
||||
assign RoundFrac = Mf[P.NORMSHIFTSZDRSU-1:P.NORMSHIFTSZDRSU-P.NF];
|
||||
|
||||
|
||||
|
||||
// select the exponent
|
||||
assign Me = Ue;
|
||||
|
||||
|
||||
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
|
||||
assign Re = FullRe[P.NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
45
src/fpu/divremsqrt/divremsqrtroundsign.sv
Normal file
45
src/fpu/divremsqrt/divremsqrtroundsign.sv
Normal file
@ -0,0 +1,45 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtroundsign.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu,me@KatherineParry.com
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Sign calculation for rounding
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // x sign
|
||||
input logic Ys, // y sign
|
||||
input logic Sqrt, // sqrt oppertion? (when using divsqrt unit)
|
||||
input logic DivOp, // is divsqrt opperation
|
||||
output logic Ms // normalized result sign
|
||||
);
|
||||
|
||||
logic Qs; // divsqrt result sign
|
||||
|
||||
// calculate divsqrt sign
|
||||
assign Qs = Xs^(Ys&~Sqrt);
|
||||
|
||||
// Select sign for rounding calulation
|
||||
assign Ms = (Qs&DivOp);
|
||||
|
||||
endmodule
|
94
src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
Normal file
94
src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
Normal file
@ -0,0 +1,94 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtshiftcorrection.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: shift correction
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NORMSHIFTSZDRSU-1:0] Shifted, // the shifted sum before LZA correction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
|
||||
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
||||
//fma
|
||||
//input logic FmaOp, // is it an fma opperation
|
||||
//input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
//input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
|
||||
//input logic FmaSZero,
|
||||
// output
|
||||
//output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
||||
output logic [P.NORMSHIFTSZDRSU-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [P.NE+1:0] Ue // corrected exponent for divider
|
||||
);
|
||||
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
|
||||
logic [P.NORMSHIFTSZDRSU-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
|
||||
logic ResSubnorm; // is the result Subnormal
|
||||
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
|
||||
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
|
||||
|
||||
// LZA correction
|
||||
assign LZAPlus1 = Shifted[P.NORMSHIFTSZDRSU-1];
|
||||
|
||||
// correct the shifting error caused by the LZA
|
||||
// - the only possible mantissa for a plus two is all zeroes
|
||||
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
//mux2 #(P.NORMSHIFTSZDRSU-2) lzacorrmux(Shifted[P.NORMSHIFTSZDRSU-3:0], Shifted[P.NORMSHIFTSZDRSU-2:1], LZAPlus1, CorrSumShifted);
|
||||
|
||||
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
||||
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
||||
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
|
||||
//assign LeftShiftQm = ((DivUe==1));
|
||||
assign CorrQm0 = {Shifted[P.NORMSHIFTSZDRSU-3:0],{2'b00}};
|
||||
assign CorrQm1 = {Shifted[P.NORMSHIFTSZDRSU-2:0],{1'b0}};
|
||||
mux2 #(P.NORMSHIFTSZDRSU) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||
|
||||
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
|
||||
always_comb
|
||||
//if(FmaOp) Mf = {CorrSumShifted, {P.NORMSHIFTSZDRSU-(3*P.NF+4){1'b0}}};
|
||||
//if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
|
||||
if (~DivResSubnorm) Mf = CorrQmShifted;
|
||||
else Mf = Shifted[P.NORMSHIFTSZDRSU-1:0];
|
||||
|
||||
// Determine sum's exponent
|
||||
// main exponent issues:
|
||||
// - LZA was one too large
|
||||
// - LZA was two too large
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
|
||||
// if plus1 If plus2 kill if the result Zero or actually subnormal
|
||||
// | | |
|
||||
//assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
|
||||
|
||||
// recalculate if the result is subnormal after LZA correction
|
||||
//assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZDRSU-2]&~Shifted[P.NORMSHIFTSZDRSU-1];
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
||||
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
//assign Ue = (DivResSubnorm ) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
endmodule
|
240
src/fpu/divremsqrt/divremsqrtspecialcase.sv
Normal file
240
src/fpu/divremsqrt/divremsqrtspecialcase.sv
Normal file
@ -0,0 +1,240 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtspecialcase.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu,me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: special case selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [P.NF:0] Xm, Ym, // input significand's
|
||||
input logic XNaN, YNaN, // are the inputs NaN
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // are any inputs infinity
|
||||
input logic NaNIn, // are any input NaNs
|
||||
input logic XInf, YInf, // are X or Y inifnity
|
||||
input logic XZero, // is X zero
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Rs, // the result's sign
|
||||
input logic Invalid, Overflow, // flags to choose the result
|
||||
input logic [P.NE-1:0] Re, // Result exponent
|
||||
input logic [P.NE+1:0] FullRe, // Result full exponent
|
||||
input logic [P.NF-1:0] Rf, // Result fraction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivByZero, // divide by zero flag
|
||||
// outputs
|
||||
output logic [P.FLEN-1:0] PostProcRes // final result
|
||||
);
|
||||
|
||||
logic [P.FLEN-1:0] XNaNRes; // X is NaN result
|
||||
logic [P.FLEN-1:0] YNaNRes; // Y is NaN result
|
||||
logic [P.FLEN-1:0] InvalidRes; // Invalid result result
|
||||
logic [P.FLEN-1:0] UfRes; // underflowed result result
|
||||
logic [P.FLEN-1:0] OfRes; // overflowed result result
|
||||
logic [P.FLEN-1:0] NormRes; // normal result
|
||||
logic OfResMax; // does the of result output maximum norm fp number
|
||||
logic KillRes; // kill the result for underflow
|
||||
logic SelOfRes; // should the overflow result be selected
|
||||
|
||||
|
||||
// does the overflow result output the maximum normalized floating point number
|
||||
// output infinity if the input is infinity
|
||||
assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
|
||||
|
||||
// select correct outputs for special cases
|
||||
if (P.FPSIZES == 1) begin
|
||||
//NaN res selection depending on standard
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = {Rs, Re, Rf};
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
|
||||
always_comb
|
||||
if(OutFmt)
|
||||
if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
|
||||
else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
else
|
||||
if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
|
||||
else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
P.FMT1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
end
|
||||
P.FMT2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
|
||||
YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = (P.FLEN)'(0);
|
||||
YNaNRes = (P.FLEN)'(0);
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end else begin
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end
|
||||
OfRes = (P.FLEN)'(0);
|
||||
UfRes = (P.FLEN)'(0);
|
||||
NormRes = (P.FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
2'h1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
|
||||
YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
|
||||
YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
|
||||
YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};
|
||||
// zero is exact if dividing by infinity so don't add 1
|
||||
UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// determine if you shoould kill the res - Cvt
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
|
||||
|
||||
// calculate if the overflow result should be selected
|
||||
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
|
||||
|
||||
// output infinity with result sign if divide by zero
|
||||
if(P.IEEE754)
|
||||
always_comb
|
||||
if(XNaN) PostProcRes = XNaNRes;
|
||||
else if(YNaN) PostProcRes = YNaNRes;
|
||||
else if(Invalid) PostProcRes = InvalidRes;
|
||||
else if(SelOfRes) PostProcRes = OfRes;
|
||||
else if(KillRes) PostProcRes = UfRes;
|
||||
else PostProcRes = NormRes;
|
||||
else
|
||||
always_comb
|
||||
if(NaNIn|Invalid) PostProcRes = InvalidRes;
|
||||
else if(SelOfRes) PostProcRes = OfRes;
|
||||
else if(KillRes) PostProcRes = UfRes;
|
||||
else PostProcRes = NormRes;
|
||||
|
||||
endmodule
|
102
src/fpu/divremsqrt/drsu.sv
Normal file
102
src/fpu/divremsqrt/drsu.sv
Normal file
@ -0,0 +1,102 @@
|
||||
///////////////////////////////////////////
|
||||
// drsu.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified:19 May 2023
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module drsu import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE, YsE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XSNaNE, YSNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
input logic [2:0] Frm,
|
||||
input logic [3:0] OpCtrl,
|
||||
input logic [1:0] PostProcSel,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.FLEN-1:0] FResM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM,
|
||||
output logic [4:0] FlgM
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
logic [P.DIVb:0] UmM;
|
||||
logic [P.NF+2:0] UmMexact; //U1.NF+2
|
||||
logic [P.NE+1:0] UeM;
|
||||
logic DivStickyM;
|
||||
logic [P.INTDIVb+3:0] PreResultM;
|
||||
logic [P.XLEN-1:0] PreIntResultM;
|
||||
logic [P.DIVBLEN-1:0] IntNormShiftM;
|
||||
|
||||
divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE,
|
||||
.XeE, .YeE, .SqrtE, .SqrtM,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE,
|
||||
.XNaNE, .YNaNE,
|
||||
.FDivStartE, .IDivStartE, .W64E,
|
||||
.StallM, .DivStickyM, .FDivBusyE, .UeM,
|
||||
.UmM,
|
||||
.FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
|
||||
.Funct3E, .IntDivE, .FIntDivResultM, .IntDivM,
|
||||
.FDivDoneE, .IFDivStartE, .IntNormShiftM, .PreIntResultM, .PreResultM);
|
||||
assign UmMexact = UmM[P.DIVb:P.DIVb-(P.NF+3-1)]; // grabbing top 1+(NF+2) msbs
|
||||
divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, .IntDivM,
|
||||
.XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE),
|
||||
.YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivUe(UeM), .DivUm(UmMexact), .PostProcRes(FResM), .PostProcFlg(FlgM),
|
||||
.PreIntResultM, .PreResultM, .IntNormShiftM);
|
||||
endmodule
|
||||
|
37
src/fpu/divremsqrt/intrightshift.sv
Normal file
37
src/fpu/divremsqrt/intrightshift.sv
Normal file
@ -0,0 +1,37 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtpostproc.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Divide/Square root postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module intrightshift import cvw::*; #(parameter cvw_t P) (
|
||||
input logic signed [P.INTDIVb+3:0] shiftin,
|
||||
input logic [P.DIVBLEN-1:0] shiftamt,
|
||||
output logic signed [P.INTDIVb+3:0] shifted
|
||||
);
|
||||
assign shifted = shiftin >> shiftamt;
|
||||
|
||||
endmodule
|
1682
testbench/testbench-fp.sv
Normal file
1682
testbench/testbench-fp.sv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,7 @@
|
||||
#!/bin/sh
|
||||
# create test vectors for stand alone int
|
||||
|
||||
mkdir IF_vectors
|
||||
./extract_testfloat_vectors.py
|
||||
./extract_arch_vectors.py
|
||||
cp IF_vectors/* ../vectors
|
||||
|
Loading…
Reference in New Issue
Block a user