From b815f17560004993f80cd529047ed9a4cddc05a4 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Tue, 20 Feb 2024 17:16:29 -0800 Subject: [PATCH 01/19] regression-wally handles softfloat --- sim/regression-wally | 114 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 104 insertions(+), 10 deletions(-) diff --git a/sim/regression-wally b/sim/regression-wally index d06ac0b28..61f133fa9 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -33,6 +33,7 @@ os.chdir(regressionDir) coverage = '-coverage' in sys.argv fp = '-fp' in sys.argv nightly = '-nightly' in sys.argv +softfloat = '-softfloat' in sys.argv TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) # name: the name of this test configuration (used in printing human-readable @@ -161,6 +162,45 @@ for test in tests64gc: # run derivative configurations if requested if (nightly): + derivconfigtests = [ + ["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], + ["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + + + ["f_rv32gc", ["arch32f", "arch32f_divsqrt"]], + ["fh_rv32gc", ["arch32f", "arch32f_divsqrt"]], + ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32d", "arch32d_divsqrt"]], + ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32d", "arch32d_divsqrt" ]], + ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32d", "arch32d_divsqrt"]], + ["f_rv64gc", ["arch64f", "arch64f_divsqrt"]], + ["fh_rv64gc", ["arch64f", "arch64f_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed + ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], + ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], + ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], + ] + """ derivconfigtests = [ ["tlb2_rv32gc", ["wally32priv"]], ["tlb16_rv32gc", ["wally32priv"]], @@ -269,16 +309,16 @@ if (nightly): # enable floating-point tests when lint is fixed -# ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], -# ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], -# ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], -# ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], -# ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], -# ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]], -# ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed -# ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], -# ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], -# ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], + ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]], + ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed + ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], ] @@ -309,6 +349,58 @@ for test in tests32e: grepstr="All tests ran without failures") configs.append(tc) + +# softfloat tests +if (softfloat): + configs = [] + softfloatconfigs = ['fdh_ieee_rv32gc', 'fdqh_ieee_rv32gc', 'fdq_ieee_rv32gc', \ + 'fh_ieee_v32gc', 'f_ieee_rv64gc', 'fdqh_ieee_rv64gc', \ + 'fdq_ieee_rv64gc', 'div_2_1_rv32gc', 'div_2_2_rv32gc', \ + 'div_2_4_rv32gc', 'div_4_1_rv32gc', 'div_4_2_rv32gc', \ + 'div_4_4_rv32gc', 'fd_ieee_rv32gc', 'fh_ieee_rv32gc', \ + 'div_2_1_rv64gc', 'div_2_2_rv64gc', 'div_2_4_rv64gc', \ + 'div_4_1_rv64gc', 'div_4_2_rv64gc', 'div_4_4_rv64gc', \ + 'fd_ieee_rv64gc', 'fh_ieee_rv64gc', 'f_ieee_rv32gc'] + for config in softfloatconfigs: + # div test case + divtest = TestCase( + name="div", + variant=config, + cmd="vsim > {} -c < {} -c < {} -c < {} -c < Date: Tue, 20 Feb 2024 17:17:45 -0800 Subject: [PATCH 02/19] typo fix --- sim/regression-wally | 1 - 1 file changed, 1 deletion(-) diff --git a/sim/regression-wally b/sim/regression-wally index 61f133fa9..b721d8899 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -200,7 +200,6 @@ if (nightly): ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], ] - """ derivconfigtests = [ ["tlb2_rv32gc", ["wally32priv"]], ["tlb16_rv32gc", ["wally32priv"]], From c8ff1bddec4bb87aae060c9b9da6094871bff55b Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Tue, 20 Feb 2024 17:21:29 -0800 Subject: [PATCH 03/19] formatting --- addins/riscv-arch-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 8a52b016d..c955abf75 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 8a52b016dbe1e2733cc168b9d6e5c93e39059d4d +Subproject commit c955abf757df98cf38809e40a62d2a6b448ea507 From 7e3df23f28c202a8eaf0aaa67e539e1c32c45fc4 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Tue, 20 Feb 2024 17:24:04 -0800 Subject: [PATCH 04/19] Revert "formatting" This reverts commit c8ff1bddec4bb87aae060c9b9da6094871bff55b. --- addins/riscv-arch-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index c955abf75..8a52b016d 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit c955abf757df98cf38809e40a62d2a6b448ea507 +Subproject commit 8a52b016dbe1e2733cc168b9d6e5c93e39059d4d From 19a61e301ea4a1aea3382225b6fdd017c054d088 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Tue, 20 Feb 2024 17:24:15 -0800 Subject: [PATCH 05/19] formatting --- sim/regression-wally | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/sim/regression-wally b/sim/regression-wally index b721d8899..28c2e9a7a 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -162,44 +162,6 @@ for test in tests64gc: # run derivative configurations if requested if (nightly): - derivconfigtests = [ - ["div_2_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_2_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_2_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_2_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_2_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_2_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_4_1_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_4_1i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_4_2_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_4_2i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_4_4_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_4_4i_rv32gc", ["arch32f_divsqrt", "arch32d_divsqrt", "arch32m"]], - ["div_2_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_2_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_2_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_2_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_2_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_2_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_1_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_1i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - - - ["f_rv32gc", ["arch32f", "arch32f_divsqrt"]], - ["fh_rv32gc", ["arch32f", "arch32f_divsqrt"]], - ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32d", "arch32d_divsqrt"]], - ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32d", "arch32d_divsqrt" ]], - ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32d", "arch32d_divsqrt"]], - ["f_rv64gc", ["arch64f", "arch64f_divsqrt"]], - ["fh_rv64gc", ["arch64f", "arch64f_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed - ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], - ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], - ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64d", "arch64d_divsqrt"]], - ] derivconfigtests = [ ["tlb2_rv32gc", ["wally32priv"]], ["tlb16_rv32gc", ["wally32priv"]], @@ -306,7 +268,6 @@ if (nightly): ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - # enable floating-point tests when lint is fixed ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], From 02081cac409fec665c0ed0de29648f8dbbca4fb4 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Wed, 21 Feb 2024 20:49:38 -0800 Subject: [PATCH 06/19] softfloat jobs now run concurrently with help of testfloat-batch.do directing compiled designs into individual folders for each config/test --- sim/regression-wally | 15 +++++++----- sim/testfloat-batch.do | 55 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 sim/testfloat-batch.do diff --git a/sim/regression-wally b/sim/regression-wally index 28c2e9a7a..e53ebd0d8 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -326,7 +326,7 @@ if (softfloat): divtest = TestCase( name="div", variant=config, - cmd="vsim > {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < {} -c < Date: Thu, 22 Feb 2024 10:22:23 -0800 Subject: [PATCH 07/19] updated configs list in regression-wally --- sim/regression-wally | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/sim/regression-wally b/sim/regression-wally index e53ebd0d8..ad1720004 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -321,6 +321,32 @@ if (softfloat): 'div_2_1_rv64gc', 'div_2_2_rv64gc', 'div_2_4_rv64gc', \ 'div_4_1_rv64gc', 'div_4_2_rv64gc', 'div_4_4_rv64gc', \ 'fd_ieee_rv64gc', 'fh_ieee_rv64gc', 'f_ieee_rv32gc'] + softfloatconfigs = ['fdh_ieee_div_2_1_rv32gc', 'fdh_ieee_div_2_1_rv64gc', \ + 'fdh_ieee_div_2_2_rv32gc', 'fdh_ieee_div_2_2_rv64gc', 'fdh_ieee_div_2_4_rv32gc', \ + 'fdh_ieee_div_2_4_rv64gc', 'fdh_ieee_div_4_1_rv32gc', 'fdh_ieee_div_4_1_rv64gc', \ + 'fdh_ieee_div_4_2_rv32gc', 'fdh_ieee_div_4_2_rv64gc', 'fdh_ieee_div_4_4_rv64gc', \ + 'fdh_ieee_rv32gc', 'fd_ieee_div_2_1_rv32gc', 'fd_ieee_div_2_1_rv64gc', \ + 'fd_ieee_div_2_2_rv32gc', 'fd_ieee_div_2_2_rv64gc', 'fd_ieee_div_2_4_rv32gc', \ + 'fd_ieee_div_2_4_rv64gc', 'fd_ieee_div_4_1_rv32gc', 'fd_ieee_div_4_1_rv64gc', \ + 'fd_ieee_div_4_2_rv32gc', 'fd_ieee_div_4_2_rv64gc', 'fd_ieee_div_4_4_rv64gc', \ + 'fd_ieee_rv32gc', 'fd_ieee_rv64gc', 'fdqh_ieee_div_2_1_rv32gc', \ + 'fdqh_ieee_div_2_1_rv64gc', 'fdqh_ieee_div_2_2_rv32gc', 'fdqh_ieee_div_2_2_rv64gc', \ + 'fdqh_ieee_div_2_4_rv32gc', 'fdqh_ieee_div_2_4_rv64gc', 'fdqh_ieee_div_4_1_rv32gc', \ + 'fdqh_ieee_div_4_1_rv64gc', 'fdqh_ieee_div_4_2_rv32gc', 'fdqh_ieee_div_4_2_rv64gc',\ + 'fdqh_ieee_div_4_4_rv64gc', 'fdqh_ieee_rv32gc', 'fdqh_ieee_rv64gc', \ + 'fdq_ieee_div_2_1_rv32gc', 'fdq_ieee_div_2_1_rv64gc', 'fdq_ieee_div_2_2_rv32gc',\ + 'fdq_ieee_div_2_2_rv64gc', 'fdq_ieee_div_2_4_rv32gc', 'fdq_ieee_div_2_4_rv64gc', \ + 'fdq_ieee_div_4_1_rv32gc', 'fdq_ieee_div_4_1_rv64gc', 'fdq_ieee_div_4_2_rv32gc', \ + 'fdq_ieee_div_4_2_rv64gc', 'fdq_ieee_div_4_4_rv64gc', 'fdq_ieee_rv32gc', \ + 'fdq_ieee_rv64gc', 'fh_ieee_div_2_1_rv32gc', 'fh_ieee_div_2_1_rv64gc', \ + 'fh_ieee_div_2_2_rv32gc', 'fh_ieee_div_2_2_rv64gc', 'fh_ieee_div_2_4_rv32gc',\ + 'fh_ieee_div_2_4_rv64gc', 'fh_ieee_div_4_1_rv32gc', 'fh_ieee_div_4_1_rv64gc',\ + 'fh_ieee_div_4_2_rv32gc', 'fh_ieee_div_4_2_rv64gc', 'fh_ieee_div_4_4_rv64gc', \ + 'fh_ieee_rv32gc', 'fh_ieee_rv64gc', 'fh_ieee_v32gc', 'f_ieee_div_2_1_rv32gc', \ + 'f_ieee_div_2_1_rv64gc', 'f_ieee_div_2_2_rv32gc', 'f_ieee_div_2_2_rv64gc', \ + 'f_ieee_div_2_4_rv32gc', 'f_ieee_div_2_4_rv64gc', 'f_ieee_div_4_1_rv32gc', \ + 'f_ieee_div_4_1_rv64gc', 'f_ieee_div_4_2_rv32gc', 'f_ieee_div_4_2_rv64gc', \ + 'f_ieee_div_4_4_rv64gc', 'f_ieee_rv32gc', 'f_ieee_rv64gc'] for config in softfloatconfigs: # div test case divtest = TestCase( @@ -329,7 +355,7 @@ if (softfloat): cmd="vsim > {} -c < Date: Thu, 22 Feb 2024 19:40:06 -0800 Subject: [PATCH 08/19] modified synth makefile to handle derived configs --- synthDC/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 7968a7b52..03c3c6612 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -51,7 +51,8 @@ configs: $(CONFIG) $(CONFIG): @echo $(CONFIG) cp -r $(OLDCONFIGDIR)/shared/*.vh $(CONFIGDIR) - cp -r $(OLDCONFIGDIR)/$(CONFIG)/* $(CONFIGDIR) +# cp -r $(OLDCONFIGDIR)/$(CONFIG)/* $(CONFIGDIR) + cp -r $(OLDCONFIGDIR)/deriv/$(CONFIG)/* $(CONFIGDIR) # adjust DTIM and IROM to reasonable values depending on config ifneq ($(filter $(CONFIG), $(DIRS32)),) @@ -61,8 +62,8 @@ else ifneq ($(filter $(CONFIG), $(DIRS64)),) sed -i "s/DTIM_RANGE.*/DTIM_RANGE = 56\'h01FF;/g" $(CONFIGDIR)/config.vh sed -i "s/IROM_RANGE.*/IROM_RANGE = 56\'h01FF;/g" $(CONFIGDIR)/config.vh else - $(info $(CONFIG) does not exist in $(DIRS32) or $(DIRS64)) - @echo "Config not in list, RAM_RANGE will be unmodified" + $(info $(CONFIG) does not exist in $(DIRS32) or $(DIRS64)) + @echo "Config not in list, RAM_RANGE will be unmodified" endif # if USESRAM = 1, set that in the config file, otherwise reduce sizes From 77ccc7b319536e267c0a33713586d6c924b3c989 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sat, 2 Mar 2024 15:55:34 -0800 Subject: [PATCH 09/19] removed square root pre-process muxes --- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 1c56e04e5..cc77c47d0 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -174,9 +174,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb:0] PreSqrtX; assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even - mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even - if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1) - else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1) + mux2 #(P.DIVb+1) sqrtxmux({1'b0,Xnorm[P.DIVb:1]}, {1'b00, Xnorm[P.DIVb:2]}, EvenExp, PreSqrtX); // X/2 if exponent odd, X/4 if exponent even /* // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift From c45d67f8ba4b9f3b4f1a3dfc874049a620ccb6a9 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sat, 2 Mar 2024 20:29:03 -0800 Subject: [PATCH 10/19] fdivsqrt changes --- src/fpu/fdivsqrt/fdivsqrtiter.sv | 9 +++++---- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 4 ++-- src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv | 25 +++++++++++++++++-------- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 20f88b6cb..30232a232 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -83,7 +83,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( // Initialize C to -1 for sqrt and -R for division logic [1:0] initCUpper; if(P.RADIX == 4) begin - mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper); + mux2 #(2) cuppermux4(2'b00, 2'b00, SqrtE, initCUpper); // *** Remove this soon end else begin mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper); end @@ -108,9 +108,10 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage - logic j1; - assign j1 = (i == 0 & ~C[0][P.DIVb-1]); - fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, + logic j1,j0; + assign j0 = (i == 0 & ~C[0][P.DIVb+1]); + assign j1 = (i == 1 & ~C[0][P.DIVb+1]); + fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .j0, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index cc77c47d0..e81f5c872 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -174,7 +174,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb:0] PreSqrtX; assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even - mux2 #(P.DIVb+1) sqrtxmux({1'b0,Xnorm[P.DIVb:1]}, {1'b00, Xnorm[P.DIVb:2]}, EvenExp, PreSqrtX); // X/2 if exponent odd, X/4 if exponent even + mux2 #(P.DIVb+1) sqrtxmux({1'b0,Xnorm[P.DIVb:1]}, {1'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even /* // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 0d7a722ff..e7df4399d 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -32,7 +32,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( input logic [P.DIVb:0] U,UM, // U1.DIVb input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb input logic [P.DIVb+1:0] C, // Q2.DIVb - input logic SqrtE, j1, + input logic SqrtE, j1,j0, output logic [P.DIVb+1:0] CNext, // Q2.DIVb output logic un, output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb @@ -54,7 +54,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1 assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual - fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); + fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .j0, .udigit); assign un = 1'b0; // unused for radix 4 // F generation logic diff --git a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index c0cbe9b1c..69571b105 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -31,7 +31,7 @@ module fdivsqrtuslc4cmp ( input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits - input logic SqrtE, j1, + input logic SqrtE, j0, j1, output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); logic [6:0] Wmsbs; @@ -46,7 +46,9 @@ module fdivsqrtuslc4cmp ( // Wmsbs = | | logic [6:0] mk2, mk1, mk0, mkm1; + logic [6:0] mkj2, mkj1, mkj0, mkjm1; logic [6:0] mks2[7:0], mks1[7:0]; + logic sqrtspecial; // Prepopulate table of mks0 assign mks2[0] = 12; @@ -65,20 +67,27 @@ module fdivsqrtuslc4cmp ( assign mks1[5] = 8; // is the logic any cheaper if this is a 6? assign mks1[6] = 8; assign mks1[7] = 8; + + // handles special case when j = 0 or j = 1 for sqrt + assign mkj2 = 20; // when j = 1 use mk2[101] when j = 0 use anything bigger than 7. + assign mkj1 = j1 ? 8 : 0; // when j = 1 use mk1[101] = 8 and when j = 0 use 0 so we choose u_0 = 1 + assign sqrtspecial = SqrtE & (j1 | j0); - // Choose A for current operation + // Choose A for current operation *** Come back to this always_comb if (SqrtE) begin - if (j1) A = 3'b101; - else if (Smsbs == 5'b10000) A = 3'b111; + //if (j1) A = 3'b101; + if (Smsbs == 5'b10000) A = 3'b111; // *** can we get rid of SMSBs case? else A = Smsbs[2:0]; end else A = Dmsbs; + // Choose selection constants based on a - assign mk2 = mks2[A]; - assign mk1 = mks1[A]; - assign mk0 = -mks1[A]; - assign mkm1 = (A == 3'b000) ? -13 : -mks2[A]; // asymmetry in table + + assign mk2 = sqrtspecial ? mkj2 : mks2[A]; + assign mk1 = sqrtspecial ? mkj1 : mks1[A]; + assign mk0 = -mk1; + assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table *** can we hide? // Compare residual W to selection constants to choose digit always_comb From 6c24afaf9898027559166d0cf8a624fc91e888d2 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 3 Mar 2024 10:29:32 -0800 Subject: [PATCH 11/19] changed cycle count to account for integer bit generation for sqrt --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 2649632eb..2239bed40 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -71,7 +71,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) always_comb begin - if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 + if (SqrtE) FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; From c32173f163a18437429d518368cea7a626c3c667 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 3 Mar 2024 10:30:18 -0800 Subject: [PATCH 12/19] changed U/C initialization to account for integer bit generation on divider stage for sqrt. Quick and dirty j1 logic fix --- src/fpu/fdivsqrt/fdivsqrtiter.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 30232a232..d3ee9a4f1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -72,8 +72,8 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( // UOTFC Result U and UM registers/initialization mux // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise - assign initU = {SqrtE, {(P.DIVb){1'b0}}}; - assign initUM = {~SqrtE, {(P.DIVb){1'b0}}}; + assign initU ={(P.DIVb+1){1'b0}}; + assign initUM = {{1'b1}, {(P.DIVb){1'b0}}}; mux2 #(P.DIVb+1) Umux(UNext[P.DIVCOPIES-1], initU, IFDivStartE, UMux); mux2 #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux); flopen #(P.DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]); @@ -85,7 +85,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( if(P.RADIX == 4) begin mux2 #(2) cuppermux4(2'b00, 2'b00, SqrtE, initCUpper); // *** Remove this soon end else begin - mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper); + mux2 #(2) cuppermux2(2'b10, 2'b10, SqrtE, initCUpper); end assign initC = {initCUpper, {P.DIVb{1'b0}}}; @@ -110,7 +110,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( end else begin: stage logic j1,j0; assign j0 = (i == 0 & ~C[0][P.DIVb+1]); - assign j1 = (i == 1 & ~C[0][P.DIVb+1]); + assign j1 = (i == 1 & ~C[0][P.DIVb+1]) || (i == 0 & (C[0][P.DIVb-1] ^ C[0][P.DIVb])); fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .j0, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); From 2547e4c6d1a9428638946771e5b732b293ec8d32 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 3 Mar 2024 11:17:51 -0800 Subject: [PATCH 13/19] divider still works with NF+2 --- config/shared/config-shared.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index be5543967..dd766f2fd 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -94,7 +94,7 @@ localparam LOGR = $clog2(RADIX); // r = log(R localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated // intermediate division parameters not directly used in fdivsqrt hardware -localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right +localparam FPDIVMINb = NF + 2; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right //localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step. localparam DIVMINb = ((FPDIVMINb Date: Sun, 3 Mar 2024 13:00:20 -0800 Subject: [PATCH 14/19] remove redundant mux --- src/fpu/fdivsqrt/fdivsqrtiter.sv | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index d3ee9a4f1..311565f56 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -80,12 +80,11 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]); // C register/initialization mux - // Initialize C to -1 for sqrt and -R for division logic [1:0] initCUpper; if(P.RADIX == 4) begin - mux2 #(2) cuppermux4(2'b00, 2'b00, SqrtE, initCUpper); // *** Remove this soon + assign initCUpper = 2'b00; end else begin - mux2 #(2) cuppermux2(2'b10, 2'b10, SqrtE, initCUpper); + assign initCUpper = 2'b10; end assign initC = {initCUpper, {P.DIVb{1'b0}}}; From 9c95cba86591a1b77e7a239085d04a4ff0ce0d60 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 3 Mar 2024 18:51:10 -0800 Subject: [PATCH 15/19] remove sqrt cycle muxing --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 2239bed40..72fe04249 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -71,8 +71,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) always_comb begin - if (SqrtE) FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 - else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits + FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; else ResultBitsE = FPResultBitsE; From 7dec9cdf212bb82e48eed2bd0db2c08ae32491de Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 4 Mar 2024 10:46:16 -0800 Subject: [PATCH 16/19] optimization in uslc --- src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index 69571b105..c8b065f31 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -77,7 +77,8 @@ module fdivsqrtuslc4cmp ( always_comb if (SqrtE) begin //if (j1) A = 3'b101; - if (Smsbs == 5'b10000) A = 3'b111; // *** can we get rid of SMSBs case? + //if (Smsbs == 5'b10000) A = 3'b111; // *** can we get rid of SMSBs case? + if (Smsbs[4]) A = 3'b111; // *** can we get rid of SMSBs case? else A = Smsbs[2:0]; end else A = Dmsbs; From 587fdbdf8eab2accc0c1f0b49b2326221da199da Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 4 Mar 2024 14:30:05 -0800 Subject: [PATCH 17/19] removed j1,j0 from iteration and put inside divider stage --- src/fpu/fdivsqrt/fdivsqrtiter.sv | 5 +---- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 5 ++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 311565f56..29b6d4fe6 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -107,10 +107,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage - logic j1,j0; - assign j0 = (i == 0 & ~C[0][P.DIVb+1]); - assign j1 = (i == 1 & ~C[0][P.DIVb+1]) || (i == 0 & (C[0][P.DIVb-1] ^ C[0][P.DIVb])); - fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, .j0, + fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index e7df4399d..4323ee35c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -32,7 +32,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( input logic [P.DIVb:0] U,UM, // U1.DIVb input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb input logic [P.DIVb+1:0] C, // Q2.DIVb - input logic SqrtE, j1,j0, + input logic SqrtE, output logic [P.DIVb+1:0] CNext, // Q2.DIVb output logic un, output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb @@ -48,8 +48,11 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( logic [7:0] WCmsbs, WSmsbs; // U4.4 logic CarryIn; logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb + logic j0,j1; // Digit Selection logic + assign j0 = ~C[P.DIVb+1]; // first step of R digit selection: C = 00...0 + assign j1 = C[P.DIVb] ^ C[P.DIVb-1]; // second step of R digit selection: C = 1100...0 assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1 assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual From 9b87a00698e7f49c0e42eb4baf831e1cfe040582 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 4 Mar 2024 14:31:07 -0800 Subject: [PATCH 18/19] sqrt mux lint fixes --- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index e81f5c872..0f0273c25 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -174,7 +174,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb:0] PreSqrtX; assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even - mux2 #(P.DIVb+1) sqrtxmux({1'b0,Xnorm[P.DIVb:1]}, {1'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even + mux2 #(P.DIVb+4) sqrtxmux({4'b0,Xnorm[P.DIVb:1]}, {5'b00, Xnorm[P.DIVb:2]}, EvenExp, SqrtX); // X/2 if exponent odd, X/4 if exponent even /* // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift From 10ab07975fb21b2762ae7e9f3a648918da256a4e Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 4 Mar 2024 14:31:21 -0800 Subject: [PATCH 19/19] uslc comments --- src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index c8b065f31..7812248a9 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -31,7 +31,8 @@ module fdivsqrtuslc4cmp ( input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits - input logic SqrtE, j0, j1, + input logic SqrtE, + input logic j0,j1, // are we on first (j0) or second step (j1) of digit selection output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); logic [6:0] Wmsbs; @@ -73,11 +74,9 @@ module fdivsqrtuslc4cmp ( assign mkj1 = j1 ? 8 : 0; // when j = 1 use mk1[101] = 8 and when j = 0 use 0 so we choose u_0 = 1 assign sqrtspecial = SqrtE & (j1 | j0); - // Choose A for current operation *** Come back to this + // Choose A for current operation always_comb if (SqrtE) begin - //if (j1) A = 3'b101; - //if (Smsbs == 5'b10000) A = 3'b111; // *** can we get rid of SMSBs case? if (Smsbs[4]) A = 3'b111; // *** can we get rid of SMSBs case? else A = Smsbs[2:0]; end else A = Dmsbs;