diff --git a/.gitignore b/.gitignore index 527bdbbae..9c6691eb3 100644 --- a/.gitignore +++ b/.gitignore @@ -76,7 +76,8 @@ synthDC/*.log synthDC/*.svf synthDC/runs/ synthDC/newRuns -synthDC/PPAruns +synthDC/ppa/PPAruns +synthDC/ppa/plots synthDC/plots/ synthDC/runArchive synthDC/hdl diff --git a/.gitmodules b/.gitmodules index 132c7a9f7..81ed2d5f4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,9 +20,3 @@ [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark -[submodule "addins/sky130_osu_sc_t18"] - path = addins/sky130_osu_sc_t18 - url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t18 -[submodule "addins/sky130_osu_sc_t12"] - path = addins/sky130_osu_sc_t12 - url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12 diff --git a/addins/sky130_osu_sc_t12 b/addins/sky130_osu_sc_t12 deleted file mode 160000 index f1eef8447..000000000 --- a/addins/sky130_osu_sc_t12 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f1eef844734f73d3c79d83b82352118263eb7686 diff --git a/addins/sky130_osu_sc_t18 b/addins/sky130_osu_sc_t18 deleted file mode 160000 index 83f5245e1..000000000 --- a/addins/sky130_osu_sc_t18 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 83f5245e1a599c628d6c73e76c1774b8ab5cab91 diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index f25cd2cd6..2db418aa3 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -1,29 +1,63 @@ -#cmbase=../../addins/coremark PORT_DIR = $(CURDIR)/riscv64-baremetal cmbase=../../addins/coremark -work_dir=$(cmbase)/work +# cmbase= ../riscv-coremark/coremark +work_dir= ../../benchmarks/coremark/work +XLEN ?=64 sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ - $(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \ - $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ - $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c + $(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \ + $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ + $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c +ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) +ARCH := rv$(XLEN)im +PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ + -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ + -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ + -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ + -nostdlib -nostartfiles -ffreestanding -mstrict-align \ + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) + +# flags that cause build errors mcmodel=medlow + +# -static -mcmodel=medlow -mtune=sifive-7-series \ +# -O3 -falign-functions=16 -funroll-all-loops -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET +# -finline-functions -falign-jumps=4 \ +# -nostdlib -nostartfiles -ffreestanding -mstrict-align \ +# -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ +# -DPERFORMANCE_RUN=1 +# "-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " + +all: $(work_dir)/coremark.bare.riscv.elf.memfile + +run: + (cd ../../pipelined/regression && (time vsim -c -do "do wally-pipelined-batch.do rv$(XLEN)gc coremark" 2>&1 | tee $(work_dir)/coremark.sim.log)) + cd ../../benchmarks/coremark/ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv riscv64-unknown-elf-objdump -D $< > $<.elf.objdump - riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@ + riscv64-unknown-elf-elf2hex --bit-width $(XLEN) --input $< --output $@ extractFunctionRadix.sh $<.elf.objdump - (cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv64gc coremark" > $(work_dir)/coremark.sim.log)) - cd ../../benchmarks/coremark/ $(work_dir)/coremark.bare.riscv: $(sources) Makefile - # make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" # These flags were used by WD on CoreMark - make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " + make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)" # -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error mkdir -p $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir) - .PHONY: clean clean: rm -f $(work_dir)/* + + + +# # PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \ +# # -O3 -falign-functions=16 -funroll-all-loops \ +# # -finline-functions -falign-jumps=4 \ +# # -nostdlib -nostartfiles -ffreestanding -mstrict-align \ +# # -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ +# # -DPERFORMANCE_RUN=1 + +# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " +# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" +# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " \ No newline at end of file diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.c b/benchmarks/coremark/riscv64-baremetal/core_portme.c index 74ced52dc..57b7993ad 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.c +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c @@ -196,10 +196,13 @@ void stop_time(void) { CORE_TICKS get_time(void) { CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); unsigned long instructions = minstretDiff(); - ee_printf(" Called get_time\n"); + long long cm100 = 1000000000 / elapsed; // coremark score * 100 + long long cpi100 = elapsed*100/instructions; // CPI * 100 + ee_printf(" WALLY CoreMark Results (from get_time)\n"); ee_printf(" Elapsed MTIME: %u\n", elapsed); ee_printf(" Elapsed MINSTRET: %lu\n", instructions); - ee_printf(" CPI: %lu / %lu\n", elapsed, instructions); + ee_printf(" COREMARK/MHz Score: 10,000,000 / %lu = %d.%02d \n", elapsed, cm100/100, cm100%100); + ee_printf(" CPI: %lu / %lu = %d.%02d\n", elapsed, instructions, cpi100/100, cpi100%100); return elapsed; } /* Function: time_in_secs @@ -210,12 +213,12 @@ CORE_TICKS get_time(void) { */ secs_ret time_in_secs(CORE_TICKS ticks) { secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - float retvalint = (float) retval; - ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint); - ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retvalint); - ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retval); - ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retval); - return retvalint; + // float retvalint = (float) retval; + // ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint); + // ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retvalint); + // ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retval); + // ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retval); + return retval; } #else #error "Please implement timing functionality in core_portme.c" diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index ef26e88ad..33768b0f1 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -28,7 +28,7 @@ Original Author: Shay Gal-on Define to 1 if the platform supports floating point. */ #ifndef HAS_FLOAT -#define HAS_FLOAT 1 +#define HAS_FLOAT 0 #endif /* Configuration: HAS_TIME_H Define to 1 if platform has the time.h header file, @@ -66,6 +66,9 @@ typedef size_t CORE_TICKS; #elif HAS_TIME_H #include typedef clock_t CORE_TICKS; +// #elif (XLEN==32) +// #include +// typedef ee_u32 CORE_TICKS; #else /* Configuration: size_t and clock_t Note these need to match the size of the clock output and the xLen the processor supports @@ -105,11 +108,16 @@ typedef signed int ee_s32; typedef double ee_f32; typedef unsigned char ee_u8; typedef unsigned int ee_u32; -typedef unsigned long long ee_ptr_int; +#if (XLEN==64) + typedef unsigned long long ee_ptr_int; +#else + typedef ee_u32 ee_ptr_int; +#endif typedef size_t ee_size_t; /* align an offset to point to a 32b value */ #define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) + /* Configuration: SEED_METHOD Defines method to get seed values that cannot be computed at compile time. diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.mak b/benchmarks/coremark/riscv64-baremetal/core_portme.mak index 4bae943dc..27e31b859 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.mak +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.mak @@ -33,13 +33,13 @@ CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc # Flag: CFLAGS # Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" #PORT_CFLAGS = -O2 -static -std=gnu99 -PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld +PORT_CFLAGS = -mcmodel=medany -fno-tree-loop-distribute-patterns -fno-common -lm -lgcc -T $(PORT_DIR)/link.ld FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" #Flag: LFLAGS_END # Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). # Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += +LFLAGS_END += -static-libgcc -lgcc # Flag: PORT_SRCS # Port specific source files can be added here PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S diff --git a/benchmarks/riscv-coremark/.gitignore b/benchmarks/riscv-coremark/.gitignore deleted file mode 100644 index 0f2251abe..000000000 --- a/benchmarks/riscv-coremark/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -coremark.riscv -coremark.bare.riscv \ No newline at end of file diff --git a/benchmarks/riscv-coremark/.gitmodules b/benchmarks/riscv-coremark/.gitmodules deleted file mode 100644 index 938028cb7..000000000 --- a/benchmarks/riscv-coremark/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "coremark"] - path = coremark - url = https://github.com/eembc/coremark diff --git a/benchmarks/riscv-coremark/LICENSE b/benchmarks/riscv-coremark/LICENSE deleted file mode 100644 index 860ca9cc6..000000000 --- a/benchmarks/riscv-coremark/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2017, Christopher Celio -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/benchmarks/riscv-coremark/Makefile b/benchmarks/riscv-coremark/Makefile deleted file mode 100644 index ce8dea81e..000000000 --- a/benchmarks/riscv-coremark/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -#cmbase=../../addins/coremark -PORT_DIR = $(CURDIR)/riscv64-baremetal -cmbase=coremark -sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ - $(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \ - $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ - $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c - -work/coremark.bare.riscv.elf.memfile: work/coremark.bare.riscv - riscv64-unknown-elf-objdump -D $< > $<.elf.objdump - riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@ - extractFunctionRadix.sh $<.elf.objdump - -work/coremark.bare.riscv: $(sources) Makefile -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" - # These flags were used by WD on CoreMark - make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " -# -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error - mkdir -p work/ - mv $(cmbase)/coremark.bare.riscv work/ - -.PHONY: clean - -clean: - rm -f work/* diff --git a/benchmarks/riscv-coremark/coremark/LICENSE.md b/benchmarks/riscv-coremark/coremark/LICENSE.md deleted file mode 100644 index 14e53e9ee..000000000 --- a/benchmarks/riscv-coremark/coremark/LICENSE.md +++ /dev/null @@ -1,100 +0,0 @@ -# COREMARK® ACCEPTABLE USE AGREEMENT - -This ACCEPTABLE USE AGREEMENT (this “Agreement”) is offered by Embedded Microprocessor Benchmark Consortium, a California nonprofit corporation (“Licensor”), to users of its CoreMark® software (“Licensee”) exclusively on the following terms. - -Licensor offers benchmarking software (“Software”) pursuant to an open source license, but carefully controls use of its benchmarks and their associated goodwill. Licensor has registered its trademark in one of the benchmarks available through the Software, COREMARK, Ser. No. 85/487,290; Reg. No. 4,179,307 (the “Trademark”), and promotes the use of a standard metric as a benchmark for assessing the performance of embedded systems. Solely on the terms described herein, Licensee may use and display the Trademark in connection with the generation of data regarding measurement and analysis of computer and embedded system benchmarking via the Software (the “Licensed Use”). - -## Article 1 – License Grant. -1.1. License. Subject to the terms and conditions of this Agreement, Licensor hereby grants to Licensee, and Licensee hereby accepts from Licensor, a personal, non-exclusive, royalty-free, revocable right and license to use and display the Trademark during the term of this Agreement (the “Term”), solely and exclusively in connection with the Licensed Use. During the Term, Licensee (i) shall not modify or otherwise create derivative works of the Trademark, and (ii) may use the Trademark only to the extent permitted under this License. Neither Licensee nor any affiliate or agent thereof shall otherwise use the Trademark without the prior express written consent of Licensor, which may be withheld in its sole and absolute discretion. All rights not expressly granted to Licensee hereunder shall remain the exclusive property of Licensor. - -1.2. Modifications to the Software. Licensee shall not use the Trademark in connection with any use of a modified, derivative, or otherwise altered copy of the Software. - -1.3. Licensor’s Use. Nothing in this Agreement shall preclude Licensor or any of its successors or assigns from using or permitting other entities to use the Trademark, whether or not such entity directly or indirectly competes or conflicts with Licensee’s Licensed Use in any manner. - -1.4. Term and Termination. This Agreement is perpetual unless terminated by either of the parties. Licensee may terminate this Agreement for convenience, without cause or liability, for any reason or for no reason whatsoever, upon ten (10) business days written notice. Licensor may terminate this Agreement effective immediately upon notice of breach. Upon termination, Licensee shall immediately remove all implementations of the Trademark from the Licensed Use, and delete all digitals files and records of all materials related to the Trademark. - -## Article 2 – Ownership. -2.1. Ownership. Licensee acknowledges and agrees that Licensor is the owner of all right, title, and interest in and to the Trademark, and all such right, title, and interest shall remain with Licensor. Licensee shall not contest, dispute, challenge, oppose, or seek to cancel Licensor’s right, title, and interest in and to the Trademark. Licensee shall not prosecute any application for registration of the Trademark. Licensee shall display appropriate notices regarding ownership of the Trademark in connection with the Licensed Use. - -2.2. Goodwill. Licensee acknowledges that Licensee shall not acquire any right, title, or interest in the Trademark by virtue of this Agreement other than the license granted hereunder, and disclaims any such right, title, interest, or ownership. All goodwill and reputation generated by Licensee’s use of the Trademark shall inure to the exclusive benefit of Licensor. Licensee shall not by any act or omission use the Trademark in any manner that disparages or reflects adversely on Licensor or its Licensed Use or reputation. Licensee shall not take any action that would interfere with or prejudice Licensor’s ownership or registration of the Trademark, the validity of the Trademark or the validity of the license granted by this Agreement. If Licensor determines and notifies Licensee that any act taken in connection with the Licensed Use (i) is inaccurate, unlawful or offensive to good taste; (ii) fails to provide for proper trademark notices, or (iii) otherwise violates Licensee’s obligations under this Agreement, the license granted under this Agreement shall terminate. - -## Article 3 – Indemnification. -3.1. Indemnification Generally. Licensee agrees to indemnify, defend, and hold harmless (collectively “indemnify” or “indemnification”) Licensor, including Licensor’s members, managers, officers, and employees (collectively “Related Persons”), from and against, and pay or reimburse Licensor and such Related Persons for, any and all third-party actions, claims, demands, proceedings, investigations, inquiries (collectively, “Claims”), and any and all liabilities, obligations, fines, deficiencies, costs, expenses, royalties, losses, and damages (including reasonable outside counsel fees and expenses) associated with such Claims, to the extent that such Claim arises out of (i) Licensee’s material breach of this Agreement, or (ii) any allegation(s) that Licensee’s actions infringe or violate any third-party intellectual property right, including without limitation, any U.S. copyright, patent, or trademark, or are otherwise found to be tortious or criminal (whether or not such indemnified person is a named party in a legal proceeding). - -3.2. Notice and Defense of Claims. Licensor shall promptly notify Licensee of any Claim for which indemnification is sought, following actual knowledge of such Claim, provided however that the failure to give such notice shall not relieve Licensee of its obligations hereunder except to the extent that Licensee is materially prejudiced by such failure. In the event that any third-party Claim is brought, Licensee shall have the right and option to undertake and control the defense of such action with counsel of its choice, provided however that (i) Licensor at its own expense may participate and appear on an equal footing with Licensee in the defense of any such Claim, (ii) Licensor may undertake and control such defense in the event of the material failure of Licensee to undertake and control the same; and (iii) the defense of any Claim relating to the intellectual property rights of Licensor or its licensors and any related counterclaims shall be solely controlled by Licensor with counsel of its choice. Licensee shall not consent to judgment or concede or settle or compromise any Claim without the prior written approval of Licensor (whose approval shall not be unreasonably withheld), unless such concession or settlement or compromise includes a full and unconditional release of Licensor and any applicable Related Persons from all liabilities in respect of such Claim. - -## Article 4 – Miscellaneous. -4.1. Relationship of the Parties. This Agreement does not create a partnership, franchise, joint venture, agency, fiduciary, or employment relationship between the parties. - -4.2. No Third-Party Beneficiaries. Except for the rights of Related Persons under Article 3 (Indemnification), there are no third-party beneficiaries to this Agreement. - -4.3. Assignment. Licensee’s rights hereunder are non-assignable, and may not be sublicensed. - -4.4. Equitable Relief. Licensee acknowledges that the remedies available at law for any breach of this Agreement will, by their nature, be inadequate. Accordingly, Licensor may obtain injunctive relief or other equitable relief to restrain a breach or threatened breach of this Agreement or to specifically enforce this Agreement, without proving that any monetary damages have been sustained, and without the requirement of posting of a bond prior to obtaining such equitable relief. - -4.5. Governing Law. This Agreement will be interpreted, construed, and enforced in all respects in accordance with the laws of the State of California, without reference to its conflict of law principles. - -4.6. Attorneys’ Fees. If any legal action, arbitration or other proceeding is brought for the enforcement of this Agreement, or because of an alleged dispute, breach, default, or misrepresentation in connection with any of the provisions of this Agreement, the successful or prevailing party shall be entitled to recover its reasonable attorneys’ fees and other reasonable costs incurred in that action or proceeding, in addition to any other relief to which it may be entitled. - -4.7. Amendment; Waiver. This Agreement may not be amended, nor may any rights under it be waived, except in writing by Licensor. - -4.8. Severability. If any provision of this Agreement is held by a court of competent jurisdiction to be contrary to law, the provision shall be modified by the court and interpreted so as best to accomplish the objectives of the original provision to the fullest extent -permitted by law, and the remaining provisions of this Agreement shall remain in effect. - -4.9. Entire Agreement. This Agreement constitutes the entire agreement between the parties and supersedes all prior and contemporaneous agreements, proposals or representations, written or oral, concerning its subject matter. - - -# Apache License - -Version 2.0, January 2004 - -http://www.apache.org/licenses/ - -## TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. - -"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: - - You must give any other recipients of the Work or Derivative Works a copy of this License; and - You must cause any modified files to carry prominent notices stating that You changed the files; and - You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and - If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. - - You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS diff --git a/benchmarks/riscv-coremark/coremark/Makefile b/benchmarks/riscv-coremark/coremark/Makefile deleted file mode 100644 index 51760d1dd..000000000 --- a/benchmarks/riscv-coremark/coremark/Makefile +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -# Make sure the default target is to simply build and run the benchmark. -RSTAMP = v1.0 - -.PHONY: run score -run: $(OUTFILE) rerun score - -score: - @echo "Check run1.log and run2.log for results." - @echo "See README.md for run and reporting rules." - -ifndef PORT_DIR -# Ports for a couple of common self hosted platforms -UNAME=$(shell if command -v uname 2> /dev/null; then uname ; fi) -ifneq (,$(findstring CYGWIN,$(UNAME))) -PORT_DIR=cygwin -endif -ifneq (,$(findstring Linux,$(UNAME))) -MACHINE=$(shell uname -m) -ifneq (,$(findstring 64,$(MACHINE))) -PORT_DIR=linux64 -else -PORT_DIR=linux -endif -endif -endif -ifndef PORT_DIR -$(error PLEASE define PORT_DIR! (e.g. make PORT_DIR=simple)) -endif -vpath %.c $(PORT_DIR) -vpath %.h $(PORT_DIR) -vpath %.mak $(PORT_DIR) -include $(PORT_DIR)/core_portme.mak - -ifndef $(ITERATIONS) -ITERATIONS=0 -endif -ifdef REBUILD -FORCE_REBUILD=force_rebuild -endif - -CFLAGS += -DITERATIONS=$(ITERATIONS) - -CORE_FILES = core_list_join core_main core_matrix core_state core_util -ORIG_SRCS = $(addsuffix .c,$(CORE_FILES)) -SRCS = $(ORIG_SRCS) $(PORT_SRCS) -OBJS = $(addprefix $(OPATH),$(addsuffix $(OEXT),$(CORE_FILES)) $(PORT_OBJS)) -OUTNAME = coremark$(EXE) -OUTFILE = $(OPATH)$(OUTNAME) -LOUTCMD = $(OFLAG) $(OUTFILE) $(LFLAGS_END) -OUTCMD = $(OUTFLAG) $(OUTFILE) $(LFLAGS_END) - -HEADERS = coremark.h -CHECK_FILES = $(ORIG_SRCS) $(HEADERS) - -$(OPATH): - $(MKDIR) $(OPATH) - -.PHONY: compile link -ifdef SEPARATE_COMPILE -$(OPATH)$(PORT_DIR): - $(MKDIR) $(OPATH)$(PORT_DIR) - -compile: $(OPATH) $(OPATH)$(PORT_DIR) $(OBJS) $(HEADERS) -link: compile - $(LD) $(LFLAGS) $(XLFLAGS) $(OBJS) $(LOUTCMD) - -else - -compile: $(OPATH) $(SRCS) $(HEADERS) - $(CC) $(CFLAGS) $(XCFLAGS) $(SRCS) $(OUTCMD) -link: compile - @echo "Link performed along with compile" - -endif - -$(OUTFILE): $(SRCS) $(HEADERS) Makefile core_portme.mak $(FORCE_REBUILD) - $(MAKE) port_prebuild - $(MAKE) link - $(MAKE) port_postbuild - -.PHONY: rerun -rerun: - $(MAKE) XCFLAGS="$(XCFLAGS) -DPERFORMANCE_RUN=1" load run1.log - $(MAKE) XCFLAGS="$(XCFLAGS) -DVALIDATION_RUN=1" load run2.log - -PARAM1=$(PORT_PARAMS) 0x0 0x0 0x66 $(ITERATIONS) -PARAM2=$(PORT_PARAMS) 0x3415 0x3415 0x66 $(ITERATIONS) -PARAM3=$(PORT_PARAMS) 8 8 8 $(ITERATIONS) - -run1.log-PARAM=$(PARAM1) 7 1 2000 -run2.log-PARAM=$(PARAM2) 7 1 2000 -run3.log-PARAM=$(PARAM3) 7 1 1200 - -run1.log run2.log run3.log: load - $(MAKE) port_prerun - $(RUN) $(OUTFILE) $($(@)-PARAM) > $(OPATH)$@ - $(MAKE) port_postrun - -.PHONY: gen_pgo_data -gen_pgo_data: run3.log - -.PHONY: load -load: $(OUTFILE) - $(MAKE) port_preload - $(LOAD) $(OUTFILE) - $(MAKE) port_postload - -.PHONY: clean -clean: - rm -f $(OUTFILE) $(OPATH)*.log *.info $(OPATH)index.html $(PORT_CLEAN) - -.PHONY: force_rebuild -force_rebuild: - echo "Forcing Rebuild" - -.PHONY: check -check: - md5sum -c coremark.md5 - -ifdef ETC -# Targets related to testing and releasing CoreMark. Not part of the general release! -include Makefile.internal -endif diff --git a/benchmarks/riscv-coremark/coremark/README.md b/benchmarks/riscv-coremark/coremark/README.md deleted file mode 100644 index 16b54b7b3..000000000 --- a/benchmarks/riscv-coremark/coremark/README.md +++ /dev/null @@ -1,398 +0,0 @@ - -# Introduction - -CoreMark's primary goals are simplicity and providing a method for testing only a processor's core features. For more information about EEMBC's comprehensive embedded benchmark suites, please see www.eembc.org. - -For a more compute-intensive version of CoreMark that uses larger datasets and execution loops taken from common applications, please check out EEMBC's [CoreMark-PRO](https://www.github.com/eembc/coremark-pro) benchmark, also on GitHub. - -# Building and Running - -To build and run the benchmark, type - -`> make` - -Full results are available in the files `run1.log` and `run2.log`. CoreMark result can be found in `run1.log`. - -## Cross Compiling - -For cross compile platforms please adjust `core_portme.mak`, `core_portme.h` (and possibly `core_portme.c`) according to the specific platform used. When porting to a new platform, it is recommended to copy one of the default port folders (e.g. `mkdir && cp linux/* `), adjust the porting files, and run: -~~~ -% make PORT_DIR= -~~~ - -## Make Targets -`run` - Default target, creates `run1.log` and `run2.log`. -`run1.log` - Run the benchmark with performance parameters, and output to `run1.log` -`run2.log` - Run the benchmark with validation parameters, and output to `run2.log` -`run3.log` - Run the benchmark with profile generation parameters, and output to `run3.log` -`compile` - compile the benchmark executable -`link` - link the benchmark executable -`check` - test MD5 of sources that may not be modified -`clean` - clean temporary files - -### Make flag: `ITERATIONS` -By default, the benchmark will run between 10-100 seconds. To override, use `ITERATIONS=N` -~~~ -% make ITERATIONS=10 -~~~ -Will run the benchmark for 10 iterations. It is recommended to set a specific number of iterations in certain situations e.g.: - -* Running with a simulator -* Measuring power/energy -* Timing cannot be restarted - -Minimum required run time: **Results are only valid for reporting if the benchmark ran for at least 10 secs!** - -### Make flag: `XCFLAGS` -To add compiler flags from the command line, use `XCFLAGS` e.g.: - -~~~ -% make XCFLAGS="-g -DMULTITHREAD=4 -DUSE_FORK=1" -~~~ - -### Make flag: `CORE_DEBUG` - -Define to compile for a debug run if you get incorrect CRC. - -~~~ -% make XCFLAGS="-DCORE_DEBUG=1" -~~~ - -### Make flag: `REBUILD` - -Force a rebuild of the executable. - -## Systems Without `make` -The following files need to be compiled: -* `core_list_join.c` -* `core_main.c` -* `core_matrix.c` -* `core_state.c` -* `core_util.c` -* `PORT_DIR/core_portme.c` - -For example: -~~~ -% gcc -O2 -o coremark.exe core_list_join.c core_main.c core_matrix.c core_state.c core_util.c simple/core_portme.c -DPERFORMANCE_RUN=1 -DITERATIONS=1000 -% ./coremark.exe > run1.log -~~~ -The above will compile the benchmark for a performance run and 1000 iterations. Output is redirected to `run1.log`. - -# Parallel Execution -Use `XCFLAGS=-DMULTITHREAD=N` where N is number of threads to run in parallel. Several implementations are available to execute in multiple contexts, or you can implement your own in `core_portme.c`. - -~~~ -% make XCFLAGS="-DMULTITHREAD=4 -DUSE_PTHREAD" -~~~ - -The above will compile the benchmark for execution on 4 cores, using POSIX Threads API. - -Note: linking may fail on the previous command if your linker does not automatically add the `pthread` library. If you encounter `undefined reference` errors, please modify the `core_portme.mak` file for your platform, (e.g. `linux/core_portme.mak`) and add `-lpthread` to the `LFLAGS_END` parameter. - -# Run Parameters for the Benchmark Executable -CoreMark's executable takes several parameters as follows (but only if `main()` accepts arguments): -1st - A seed value used for initialization of data. -2nd - A seed value used for initialization of data. -3rd - A seed value used for initialization of data. -4th - Number of iterations (0 for auto : default value) -5th - Reserved for internal use. -6th - Reserved for internal use. -7th - For malloc users only, ovreride the size of the input data buffer. - -The run target from make will run coremark with 2 different data initialization seeds. - -## Alternative parameters: -If not using `malloc` or command line arguments are not supported, the buffer size -for the algorithms must be defined via the compiler define `TOTAL_DATA_SIZE`. -`TOTAL_DATA_SIZE` must be set to 2000 bytes (default) for standard runs. -The default for such a target when testing different configurations could be: - -~~~ -% make XCFLAGS="-DTOTAL_DATA_SIZE=6000 -DMAIN_HAS_NOARGC=1" -~~~ - -# Submitting Results - -CoreMark results can be submitted on the web. Open a web browser and go to the [submission page](https://www.eembc.org/coremark/submit.php). After registering an account you may enter a score. - -# Run Rules -What is and is not allowed. - -## Required -1. The benchmark needs to run for at least 10 seconds. -2. All validation must succeed for seeds `0,0,0x66` and `0x3415,0x3415,0x66`, buffer size of 2000 bytes total. - * If not using command line arguments to main: -~~~ - % make XCFLAGS="-DPERFORMANCE_RUN=1" REBUILD=1 run1.log - % make XCFLAGS="-DVALIDATION_RUN=1" REBUILD=1 run2.log -~~~ -3. If using profile guided optimization, profile must be generated using seeds of `8,8,8`, and buffer size of 1200 bytes total. -~~~ - % make XCFLAGS="-DTOTAL_DATA_SIZE=1200 -DPROFILE_RUN=1" REBUILD=1 run3.log -~~~ -4. All source files must be compiled with the same flags. -5. All data type sizes must match size in bits such that: - * `ee_u8` is an unsigned 8-bit datatype. - * `ee_s16` is a signed 16-bit datatype. - * `ee_u16` is an unsigned 16-bit datatype. - * `ee_s32` is a signed 32-bit datatype. - * `ee_u32` is an unsigned 32-bit datatype. - -## Allowed - -1. Changing number of iterations -2. Changing toolchain and build/load/run options -3. Changing method of acquiring a data memory block -5. Changing the method of acquiring seed values -6. Changing implementation `in core_portme.c` -7. Changing configuration values in `core_portme.h` -8. Changing `core_portme.mak` - -## NOT ALLOWED -1. Changing of source file other then `core_portme*` (use `make check` to validate) - -# Reporting rules -Use the following syntax to report results on a data sheet: - -CoreMark 1.0 : N / C [/ P] [/ M] - -N - Number of iterations per second with seeds 0,0,0x66,size=2000) - -C - Compiler version and flags - -P - Parameters such as data and code allocation specifics - -* This parameter *may* be omitted if all data was allocated on the heap in RAM. -* This parameter *may not* be omitted when reporting CoreMark/MHz - -M - Type of parallel execution (if used) and number of contexts -* This parameter may be omitted if parallel execution was not used. - -e.g.: - -~~~ -CoreMark 1.0 : 128 / GCC 4.1.2 -O2 -fprofile-use / Heap in TCRAM / FORK:2 -~~~ -or -~~~ -CoreMark 1.0 : 1400 / GCC 3.4 -O4 -~~~ - -If reporting scaling results, the results must be reported as follows: - -CoreMark/MHz 1.0 : N / C / P [/ M] - -P - When reporting scaling results, memory parameter must also indicate memory frequency:core frequency ratio. -1. If the core has cache and cache frequency to core frequency ratio is configurable, that must also be included. - -e.g.: - -~~~ -CoreMark/MHz 1.0 : 1.47 / GCC 4.1.2 -O2 / DDR3(Heap) 30:1 Memory 1:1 Cache -~~~ - -# Log File Format -The log files have the following format - -~~~ -2K performance run parameters for coremark. (Run type) -CoreMark Size : 666 (Buffer size) -Total ticks : 25875 (platform dependent value) -Total time (secs) : 25.875000 (actual time in seconds) -Iterations/Sec : 3864.734300 (Performance value to report) -Iterations : 100000 (number of iterations used) -Compiler version : GCC3.4.4 (Compiler and version) -Compiler flags : -O2 (Compiler and linker flags) -Memory location : Code in flash, data in on chip RAM -seedcrc : 0xe9f5 (identifier for the input seeds) -[0]crclist : 0xe714 (validation for list part) -[0]crcmatrix : 0x1fd7 (validation for matrix part) -[0]crcstate : 0x8e3a (validation for state part) -[0]crcfinal : 0x33ff (iteration dependent output) -Correct operation validated. See README.md for run and reporting rules. (*Only when run is successful*) -CoreMark 1.0 : 6508.490622 / GCC3.4.4 -O2 / Heap (*Only on a successful performance run*) -~~~ - -# Theory of Operation - -This section describes the initial goals of CoreMark and their implementation. - -## Small and easy to understand - -* X number of source code lines for timed portion of the benchmark. -* Meaningful names for variables and functions. -* Comments for each block of code more than 10 lines long. - -## Portability - -A thin abstraction layer will be provided for I/O and timing in a separate file. All I/O and timing of the benchmark will be done through this layer. - -### Code / data size - -* Compile with gcc on x86 and make sure all sizes are according to requirements. -* If dynamic memory allocation is used, take total memory allocated into account as well. -* Avoid recursive functions and keep track of stack usage. -* Use the same memory block as data site for all algorithms, and initialize the data before each algorithm – while this means that initialization with data happens during the timed portion, it will only happen once during the timed portion and so have negligible effect on the results. - -## Controlled output - -This may be the most difficult goal. Compilers are constantly improving and getting better at analyzing code. To create work that cannot be computed at compile time and must be computed at run time, we will rely on two assumptions: - -* Some system functions (e.g. time, scanf) and parameters cannot be computed at compile time. In most cases, marking a variable volatile means the compiler is force to read this variable every time it is read. This will be used to introduce a factor into the input that cannot be precomputed at compile time. Since the results are input dependent, that will make sure that computation has to happen at run time. - -* Either a system function or I/O (e.g. scanf) or command line parameters or volatile variables will be used before the timed portion to generate data which is not available at compile time. Specific method used is not relevant as long as it can be controlled, and that it cannot be computed or eliminated by the compiler at compile time. E.g. if the clock() functions is a compiler stub, it may not be used. The derived values will be reported on the output so that verification can be done on a different machine. - -* We cannot rely on command line parameters since some embedded systems do not have the capability to provide command line parameters. All 3 methods above will be implemented (time based, scanf and command line parameters) and all 3 are valid if the compiler cannot determine the value at compile time. - -* It is important to note that The actual values that are to be supplied at run time will be standardized. The methodology is not intended to provide random data, but simply to provide controlled data that cannot be precomputed at compile time. - -* Printed results must be valid at run time. This will be used to make sure the computation has been executed. - -* Some embedded systems do not provide “printf” or other I/O functionality. All I/O will be done through a thin abstraction interface to allow execution on such systems (e.g. allow output via JTAG). - -## Key Algorithms - -### Linked List - -The following linked list structure will be used: - -~~~ -typedef struct list_data_s { - ee_s16 data16; - ee_s16 idx; -} list_data; - -typedef struct list_head_s { - struct list_head_s *next; - struct list_data_s *info; -} list_head; -~~~ - -While adding a level of indirection accessing the data, this structure is realistic and used in many embedded applications for small to medium lists. - -The list itself will be initialized on a block of memory that will be passed in to the initialization function. While in general linked lists use malloc for new nodes, embedded applications sometime control the memory for small data structures such as arrays and lists directly to avoid the overhead of system calls, so this approach is realistic. - -The linked list will be initialized such that 1/4 of the list pointers point to sequential areas in memory, and 3/4 of the list pointers are distributed in a non sequential manner. This is done to emulate a linked list that had add/remove happen for a while disrupting the neat order, and then a series of adds that are likely to come from sequential memory locations. - -For the benchmark itself: -- Multiple find operations are going to be performed. These find operations may result in the whole list being traversed. The result of each find will become part of the output chain. -- The list will be sorted using merge sort based on the data16 value, and then derive CRC of the data16 item in order for part of the list. The CRC will become part of the output chain. -- The list will be sorted again using merge sort based on the idx value. This sort will guarantee that the list is returned to the primary state before leaving the function, so that multiple iterations of the function will have the same result. CRC of the data16 for part of the list will again be calculated and become part of the output chain. - -The actual `data16` in each cell will be pseudo random based on a single 16b input that cannot be determined at compile time. In addition, the part of the list which is used for CRC will also be passed to the function, and determined based on an input that cannot be determined at run time. - -### Matrix Multiply - -This very simple algorithm forms the basis of many more complex algorithms. The tight inner loop is the focus of many optimizations (compiler as well as hardware based) and is thus relevant for embedded processing. - -The total available data space will be divided to 3 parts: -1. NxN matrix A. -2. NxN matrix B. -3. NxN matrix C. - -E.g. for 2K we will have 3 12x12 matrices (assuming data type of 32b 12(len)*12(wid)*4(size)*3(num) =1728 bytes). - -Matrix A will be initialized with small values (upper 3/4 of the bits all zero). -Matrix B will be initialized with medium values (upper half of the bits all zero). -Matrix C will be used for the result. - -For the benchmark itself: -- Multiple A by a constant into C, add the upper bits of each of the values in the result matrix. The result will become part of the output chain. -- Multiple A by column X of B into C, add the upper bits of each of the values in the result matrix. The result will become part of the output chain. -- Multiple A by B into C, add the upper bits of each of the values in the result matrix. The result will become part of the output chain. - -The actual values for A and B must be derived based on input that is not available at compile time. - -### State Machine - -This part of the code needs to exercise switch and if statements. As such, we will use a small Moore state machine. In particular, this will be a state machine that identifies string input as numbers and divides them according to format. - -The state machine will parse the input string until either a “,” separator or end of input is encountered. An invalid number will cause the state machine to return invalid state and a valid number will cause the state machine to return with type of number format (int/float/scientific). - -This code will perform a realistic task, be small enough to easily understand, and exercise the required functionality. The other option used in embedded systems is a mealy based state machine, which is driven by a table. The table then determines the number of states and complexity of transitions. This approach, however, tests mainly the load/store and function call mechanisms and less the handling of branches. If analysis of the final results shows that the load/store functionality of the processor is not exercised thoroughly, it may be a good addition to the benchmark (codesize allowing). - -For input, the memory block will be initialized with comma separated values of mixed formats, as well as invalid inputs. - -For the benchmark itself: -- Invoke the state machine on all of the input and count final states and state transitions. CRC of all final states and transitions will become part of the output chain. -- Modify the input at intervals (inject errors) and repeat the state machine operation. -- Modify the input back to original form. - -The actual input must be initialized based on data that cannot be determined at compile time. In addition the intervals for modification of the input and the actual modification must be based on input that cannot be determined at compile time. - -# Validation - -This release was tested on the following platforms: -* x86 cygwin and gcc 3.4 (Quad, dual and single core systems) -* x86 linux (Ubuntu/Fedora) and gcc (4.2/4.1) (Quad and single core systems) -* MIPS64 BE linux and gcc 3.4 16 cores system -* MIPS32 BE linux with CodeSourcery compiler 4.2-177 on Malta/Linux with a 1004K 3-core system -* PPC simulator with gcc 4.2.2 (No OS) -* PPC 64b BE linux (yellowdog) with gcc 3.4 and 4.1 (Dual core system) -* BF533 with VDSP50 -* Renesas R8C/H8 MCU with HEW 4.05 -* NXP LPC1700 armcc v4.0.0.524 -* NEC 78K with IAR v4.61 -* ARM simulator with armcc v4 - -# Memory Analysis - -Valgrind 3.4.0 used and no errors reported. - -# Balance Analysis - -Number of instructions executed for each function tested with cachegrind and found balanced with gcc and -O0. - -# Statistics - -Lines: -~~~ -Lines Blank Cmnts Source AESL -===== ===== ===== ===== ========== ======================================= - 469 66 170 251 627.5 core_list_join.c (C) - 330 18 54 268 670.0 core_main.c (C) - 256 32 80 146 365.0 core_matrix.c (C) - 240 16 51 186 465.0 core_state.c (C) - 165 11 20 134 335.0 core_util.c (C) - 150 23 36 98 245.0 coremark.h (C) - 1610 166 411 1083 2707.5 ----- Benchmark ----- (6 files) - 293 15 74 212 530.0 linux/core_portme.c (C) - 235 30 104 104 260.0 linux/core_portme.h (C) - 528 45 178 316 790.0 ----- Porting ----- (2 files) - -* For comparison, here are the stats for Dhrystone -Lines Blank Cmnts Source AESL -===== ===== ===== ===== ========== ======================================= - 311 15 242 54 135.0 dhry.h (C) - 789 132 119 553 1382.5 dhry_1.c (C) - 186 26 68 107 267.5 dhry_2.c (C) - 1286 173 429 714 1785.0 ----- C ----- (3 files) -~~~ - -# Credits -Many thanks to all of the individuals who helped with the development or testing of CoreMark including (Sorted by company name; note that company names may no longer be accurate as this was written in 2009). -* Alan Anderson, ADI -* Adhikary Rajiv, ADI -* Elena Stohr, ARM -* Ian Rickards, ARM -* Andrew Pickard, ARM -* Trent Parker, CAVIUM -* Shay Gal-On, EEMBC -* Markus Levy, EEMBC -* Peter Torelli, EEMBC -* Ron Olson, IBM -* Eyal Barzilay, MIPS -* Jens Eltze, NEC -* Hirohiko Ono, NEC -* Ulrich Drees, NEC -* Frank Roscheda, NEC -* Rob Cosaro, NXP -* Shumpei Kawasaki, RENESAS - -# Legal -Please refer to LICENSE.md in this reposity for a description of your rights to use this code. - -# Copyright -Copyright © 2009 EEMBC All rights reserved. -CoreMark is a trademark of EEMBC and EEMBC is a registered trademark of the Embedded Microprocessor Benchmark Consortium. - diff --git a/benchmarks/riscv-coremark/coremark/barebones/core_portme.c b/benchmarks/riscv-coremark/coremark/barebones/core_portme.c deleted file mode 100644 index 18967676b..000000000 --- a/benchmarks/riscv-coremark/coremark/barebones/core_portme.c +++ /dev/null @@ -1,153 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ -#include "coremark.h" -#include "core_portme.h" - -#if VALIDATION_RUN -volatile ee_s32 seed1_volatile = 0x3415; -volatile ee_s32 seed2_volatile = 0x3415; -volatile ee_s32 seed3_volatile = 0x66; -#endif -#if PERFORMANCE_RUN -volatile ee_s32 seed1_volatile = 0x0; -volatile ee_s32 seed2_volatile = 0x0; -volatile ee_s32 seed3_volatile = 0x66; -#endif -#if PROFILE_RUN -volatile ee_s32 seed1_volatile = 0x8; -volatile ee_s32 seed2_volatile = 0x8; -volatile ee_s32 seed3_volatile = 0x8; -#endif -volatile ee_s32 seed4_volatile = ITERATIONS; -volatile ee_s32 seed5_volatile = 0; -/* Porting : Timing functions - How to capture time and convert to seconds must be ported to whatever is - supported by the platform. e.g. Read value from on board RTC, read value from - cpu clock cycles performance counter etc. Sample implementation for standard - time.h and windows.h definitions included. -*/ -CORETIMETYPE -barebones_clock() -{ -#error \ - "You must implement a method to measure time in barebones_clock()! This function should return current time.\n" -} -/* Define : TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be - measured. - - Use lower values to increase resolution, but make sure that overflow - does not occur. If there are issues with the return value overflowing, - increase this value. - */ -#define GETMYTIME(_t) (*_t = barebones_clock()) -#define MYTIMEDIFF(fin, ini) ((fin) - (ini)) -#define TIMER_RES_DIVIDER 1 -#define SAMPLE_TIME_IMPLEMENTATION 1 -#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER) - -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function : start_time - This function will be called right before starting the timed portion of - the benchmark. - - Implementation may be capturing a system timer (as implemented in the - example code) or zeroing some system parameters - e.g. setting the cpu clocks - cycles to 0. -*/ -void -start_time(void) -{ - GETMYTIME(&start_time_val); -} -/* Function : stop_time - This function will be called right after ending the timed portion of the - benchmark. - - Implementation may be capturing a system timer (as implemented in the - example code) or other system parameters - e.g. reading the current value of - cpu cycles counter. -*/ -void -stop_time(void) -{ - GETMYTIME(&stop_time_val); -} -/* Function : get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other - value, as long as it can be converted to seconds by . This - methodology is taken to accomodate any hardware or simulated platform. The - sample implementation returns millisecs by default, and the resolution is - controlled by -*/ -CORE_TICKS -get_time(void) -{ - CORE_TICKS elapsed - = (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function : time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for - floating point. Default implementation implemented by the EE_TICKS_PER_SEC - macro above. -*/ -secs_ret -time_in_secs(CORE_TICKS ticks) -{ - secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} - -ee_u32 default_num_contexts = 1; - -/* Function : portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void -portable_init(core_portable *p, int *argc, char *argv[]) -{ -#error \ - "Call board initialization routines in portable init (if needed), in particular initialize UART!\n" - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) - { - ee_printf( - "ERROR! Please define ee_ptr_int to a type that holds a " - "pointer!\n"); - } - if (sizeof(ee_u32) != 4) - { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } - p->portable_id = 1; -} -/* Function : portable_fini - Target specific final code -*/ -void -portable_fini(core_portable *p) -{ - p->portable_id = 0; -} diff --git a/benchmarks/riscv-coremark/coremark/barebones/core_portme.h b/benchmarks/riscv-coremark/coremark/barebones/core_portme.h deleted file mode 100644 index 55f643bf3..000000000 --- a/benchmarks/riscv-coremark/coremark/barebones/core_portme.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ -/* Topic : Description - This file contains configuration constants required to execute on - different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration : HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration : HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration : USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 1 -#endif -/* Configuration : HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 0 -#endif -/* Configuration : HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf - function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 0 -#endif - -/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION -#ifdef __GNUC__ -#define COMPILER_VERSION "GCC"__VERSION__ -#else -#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" -#endif -#endif -#ifndef COMPILER_FLAGS -#define COMPILER_FLAGS \ - FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION -#define MEM_LOCATION "STACK" -#endif - -/* Data Types : - To avoid compiler issues, define the data types that need ot be used for - 8b, 16b and 32b in . - - *Imprtant* : - ee_ptr_int needs to be the data type used to hold pointers, otherwise - coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef ee_u32 ee_ptr_int; -typedef size_t ee_size_t; -#define NULL ((void *)0) -/* align_mem : - This macro is used to align an offset to point to a 32b value. It is - used in the Matrix algorithm to initialize the input memory blocks. -*/ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) - -/* Configuration : CORE_TICKS - Define type of return from the timing functions. - */ -#define CORETIMETYPE ee_u32 -typedef ee_u32 CORE_TICKS; - -/* Configuration : SEED_METHOD - Defines method to get seed values that cannot be computed at compile - time. - - Valid values : - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_VOLATILE -#endif - -/* Configuration : MEM_METHOD - Defines method to get a block of memry. - - Valid values : - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_STACK -#endif - -/* Configuration : MULTITHREAD - Define for parallel execution - - Valid values : - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note : - If this flag is defined to more then 1, an implementation for launching - parallel contexts must be defined. - - Two sample implementations are provided. Use or - to enable them. - - It is valid to have a different implementation of - and in , to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#define USE_PTHREAD 0 -#define USE_FORK 0 -#define USE_SOCKET 0 -#endif - -/* Configuration : MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values : - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported - - Note : - This flag only matters if MULTITHREAD has been defined to a value - greater then 1. -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration : MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values : - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable : default_num_contexts - Not used for this simple port, must cintain the value 1. -*/ -extern ee_u32 default_num_contexts; - -typedef struct CORE_PORTABLE_S -{ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \ - && !defined(VALIDATION_RUN) -#if (TOTAL_DATA_SIZE == 1200) -#define PROFILE_RUN 1 -#elif (TOTAL_DATA_SIZE == 2000) -#define PERFORMANCE_RUN 1 -#else -#define VALIDATION_RUN 1 -#endif -#endif - -int ee_printf(const char *fmt, ...); - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/coremark/barebones/core_portme.mak b/benchmarks/riscv-coremark/coremark/barebones/core_portme.mak deleted file mode 100755 index 81594697d..000000000 --- a/benchmarks/riscv-coremark/coremark/barebones/core_portme.mak +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File : core_portme.mak - -# Flag : OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag : CC -# Use this flag to define compiler to use -CC = gcc -# Flag : LD -# Use this flag to define compiler to use -LD = gld -# Flag : AS -# Use this flag to define compiler to use -AS = gas -# Flag : CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -PORT_CFLAGS = -O0 -g -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -#Flag : LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -SEPARATE_COMPILE=1 -# Flag : SEPARATE_COMPILE -# You must also define below how to create an object file, and how to link. -OBJOUT = -o -LFLAGS = -ASFLAGS = -OFLAG = -o -COUT = -c - -LFLAGS_END = -# Flag : PORT_SRCS -# Port specific source files can be added here -# You may also need cvt.c if the fcvt functions are not provided as intrinsics by your compiler! -PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/ee_printf.c -vpath %.c $(PORT_DIR) -vpath %.s $(PORT_DIR) - -# Flag : LOAD -# For a simple port, we assume self hosted compile and run, no load needed. - -# Flag : RUN -# For a simple port, we assume self hosted compile and run, simple invocation of the executable - -LOAD = echo "Please set LOAD to the process of loading the executable to the flash" -RUN = echo "Please set LOAD to the process of running the executable (e.g. via jtag, or board reset)" - -OEXT = .o -EXE = .bin - -$(OPATH)$(PORT_DIR)/%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -$(OPATH)%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -$(OPATH)$(PORT_DIR)/%$(OEXT) : %.s - $(AS) $(ASFLAGS) $< $(OBJOUT) $@ - -# Target : port_pre% and port_post% -# For the purpose of this simple port, no pre or post steps needed. - -.PHONY : port_prebuild port_postbuild port_prerun port_postrun port_preload port_postload -port_pre% port_post% : - -# FLAG : OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - diff --git a/benchmarks/riscv-coremark/coremark/barebones/cvt.c b/benchmarks/riscv-coremark/coremark/barebones/cvt.c deleted file mode 100644 index 333e8ead2..000000000 --- a/benchmarks/riscv-coremark/coremark/barebones/cvt.c +++ /dev/null @@ -1,127 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -#include -#define CVTBUFSIZE 80 -static char CVTBUF[CVTBUFSIZE]; - -static char * -cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag) -{ - int r2; - double fi, fj; - char * p, *p1; - - if (ndigits < 0) - ndigits = 0; - if (ndigits >= CVTBUFSIZE - 1) - ndigits = CVTBUFSIZE - 2; - r2 = 0; - *sign = 0; - p = &buf[0]; - if (arg < 0) - { - *sign = 1; - arg = -arg; - } - arg = modf(arg, &fi); - p1 = &buf[CVTBUFSIZE]; - - if (fi != 0) - { - p1 = &buf[CVTBUFSIZE]; - while (fi != 0) - { - fj = modf(fi / 10, &fi); - *--p1 = (int)((fj + .03) * 10) + '0'; - r2++; - } - while (p1 < &buf[CVTBUFSIZE]) - *p++ = *p1++; - } - else if (arg > 0) - { - while ((fj = arg * 10) < 1) - { - arg = fj; - r2--; - } - } - p1 = &buf[ndigits]; - if (eflag == 0) - p1 += r2; - *decpt = r2; - if (p1 < &buf[0]) - { - buf[0] = '\0'; - return buf; - } - while (p <= p1 && p < &buf[CVTBUFSIZE]) - { - arg *= 10; - arg = modf(arg, &fj); - *p++ = (int)fj + '0'; - } - if (p1 >= &buf[CVTBUFSIZE]) - { - buf[CVTBUFSIZE - 1] = '\0'; - return buf; - } - p = p1; - *p1 += 5; - while (*p1 > '9') - { - *p1 = '0'; - if (p1 > buf) - ++*--p1; - else - { - *p1 = '1'; - (*decpt)++; - if (eflag == 0) - { - if (p > buf) - *p = '0'; - p++; - } - } - } - *p = '\0'; - return buf; -} - -char * -ecvt(double arg, int ndigits, int *decpt, int *sign) -{ - return cvt(arg, ndigits, decpt, sign, CVTBUF, 1); -} - -char * -ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf) -{ - return cvt(arg, ndigits, decpt, sign, buf, 1); -} - -char * -fcvt(double arg, int ndigits, int *decpt, int *sign) -{ - return cvt(arg, ndigits, decpt, sign, CVTBUF, 0); -} - -char * -fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf) -{ - return cvt(arg, ndigits, decpt, sign, buf, 0); -} diff --git a/benchmarks/riscv-coremark/coremark/barebones/ee_printf.c b/benchmarks/riscv-coremark/coremark/barebones/ee_printf.c deleted file mode 100644 index f2d362dc0..000000000 --- a/benchmarks/riscv-coremark/coremark/barebones/ee_printf.c +++ /dev/null @@ -1,700 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include -#include - -#define ZEROPAD (1 << 0) /* Pad with zero */ -#define SIGN (1 << 1) /* Unsigned/signed long */ -#define PLUS (1 << 2) /* Show plus */ -#define SPACE (1 << 3) /* Spacer */ -#define LEFT (1 << 4) /* Left justified */ -#define HEX_PREP (1 << 5) /* 0x */ -#define UPPERCASE (1 << 6) /* 'ABCDEF' */ - -#define is_digit(c) ((c) >= '0' && (c) <= '9') - -static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz"; -static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -static ee_size_t strnlen(const char *s, ee_size_t count); - -static ee_size_t -strnlen(const char *s, ee_size_t count) -{ - const char *sc; - for (sc = s; *sc != '\0' && count--; ++sc) - ; - return sc - s; -} - -static int -skip_atoi(const char **s) -{ - int i = 0; - while (is_digit(**s)) - i = i * 10 + *((*s)++) - '0'; - return i; -} - -static char * -number(char *str, long num, int base, int size, int precision, int type) -{ - char c, sign, tmp[66]; - char *dig = digits; - int i; - - if (type & UPPERCASE) - dig = upper_digits; - if (type & LEFT) - type &= ~ZEROPAD; - if (base < 2 || base > 36) - return 0; - - c = (type & ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & SIGN) - { - if (num < 0) - { - sign = '-'; - num = -num; - size--; - } - else if (type & PLUS) - { - sign = '+'; - size--; - } - else if (type & SPACE) - { - sign = ' '; - size--; - } - } - - if (type & HEX_PREP) - { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - - i = 0; - - if (num == 0) - tmp[i++] = '0'; - else - { - while (num != 0) - { - tmp[i++] = dig[((unsigned long)num) % (unsigned)base]; - num = ((unsigned long)num) / (unsigned)base; - } - } - - if (i > precision) - precision = i; - size -= precision; - if (!(type & (ZEROPAD | LEFT))) - while (size-- > 0) - *str++ = ' '; - if (sign) - *str++ = sign; - - if (type & HEX_PREP) - { - if (base == 8) - *str++ = '0'; - else if (base == 16) - { - *str++ = '0'; - *str++ = digits[33]; - } - } - - if (!(type & LEFT)) - while (size-- > 0) - *str++ = c; - while (i < precision--) - *str++ = '0'; - while (i-- > 0) - *str++ = tmp[i]; - while (size-- > 0) - *str++ = ' '; - - return str; -} - -static char * -eaddr(char *str, unsigned char *addr, int size, int precision, int type) -{ - char tmp[24]; - char *dig = digits; - int i, len; - - if (type & UPPERCASE) - dig = upper_digits; - len = 0; - for (i = 0; i < 6; i++) - { - if (i != 0) - tmp[len++] = ':'; - tmp[len++] = dig[addr[i] >> 4]; - tmp[len++] = dig[addr[i] & 0x0F]; - } - - if (!(type & LEFT)) - while (len < size--) - *str++ = ' '; - for (i = 0; i < len; ++i) - *str++ = tmp[i]; - while (len < size--) - *str++ = ' '; - - return str; -} - -static char * -iaddr(char *str, unsigned char *addr, int size, int precision, int type) -{ - char tmp[24]; - int i, n, len; - - len = 0; - for (i = 0; i < 4; i++) - { - if (i != 0) - tmp[len++] = '.'; - n = addr[i]; - - if (n == 0) - tmp[len++] = digits[0]; - else - { - if (n >= 100) - { - tmp[len++] = digits[n / 100]; - n = n % 100; - tmp[len++] = digits[n / 10]; - n = n % 10; - } - else if (n >= 10) - { - tmp[len++] = digits[n / 10]; - n = n % 10; - } - - tmp[len++] = digits[n]; - } - } - - if (!(type & LEFT)) - while (len < size--) - *str++ = ' '; - for (i = 0; i < len; ++i) - *str++ = tmp[i]; - while (len < size--) - *str++ = ' '; - - return str; -} - -#if HAS_FLOAT - -char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf); -char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf); -static void ee_bufcpy(char *d, char *s, int count); - -void -ee_bufcpy(char *pd, char *ps, int count) -{ - char *pe = ps + count; - while (ps != pe) - *pd++ = *ps++; -} - -static void -parse_float(double value, char *buffer, char fmt, int precision) -{ - int decpt, sign, exp, pos; - char *digits = NULL; - char cvtbuf[80]; - int capexp = 0; - int magnitude; - - if (fmt == 'G' || fmt == 'E') - { - capexp = 1; - fmt += 'a' - 'A'; - } - - if (fmt == 'g') - { - digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf); - magnitude = decpt - 1; - if (magnitude < -4 || magnitude > precision - 1) - { - fmt = 'e'; - precision -= 1; - } - else - { - fmt = 'f'; - precision -= decpt; - } - } - - if (fmt == 'e') - { - digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf); - - if (sign) - *buffer++ = '-'; - *buffer++ = *digits; - if (precision > 0) - *buffer++ = '.'; - ee_bufcpy(buffer, digits + 1, precision); - buffer += precision; - *buffer++ = capexp ? 'E' : 'e'; - - if (decpt == 0) - { - if (value == 0.0) - exp = 0; - else - exp = -1; - } - else - exp = decpt - 1; - - if (exp < 0) - { - *buffer++ = '-'; - exp = -exp; - } - else - *buffer++ = '+'; - - buffer[2] = (exp % 10) + '0'; - exp = exp / 10; - buffer[1] = (exp % 10) + '0'; - exp = exp / 10; - buffer[0] = (exp % 10) + '0'; - buffer += 3; - } - else if (fmt == 'f') - { - digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf); - if (sign) - *buffer++ = '-'; - if (*digits) - { - if (decpt <= 0) - { - *buffer++ = '0'; - *buffer++ = '.'; - for (pos = 0; pos < -decpt; pos++) - *buffer++ = '0'; - while (*digits) - *buffer++ = *digits++; - } - else - { - pos = 0; - while (*digits) - { - if (pos++ == decpt) - *buffer++ = '.'; - *buffer++ = *digits++; - } - } - } - else - { - *buffer++ = '0'; - if (precision > 0) - { - *buffer++ = '.'; - for (pos = 0; pos < precision; pos++) - *buffer++ = '0'; - } - } - } - - *buffer = '\0'; -} - -static void -decimal_point(char *buffer) -{ - while (*buffer) - { - if (*buffer == '.') - return; - if (*buffer == 'e' || *buffer == 'E') - break; - buffer++; - } - - if (*buffer) - { - int n = strnlen(buffer, 256); - while (n > 0) - { - buffer[n + 1] = buffer[n]; - n--; - } - - *buffer = '.'; - } - else - { - *buffer++ = '.'; - *buffer = '\0'; - } -} - -static void -cropzeros(char *buffer) -{ - char *stop; - - while (*buffer && *buffer != '.') - buffer++; - if (*buffer++) - { - while (*buffer && *buffer != 'e' && *buffer != 'E') - buffer++; - stop = buffer--; - while (*buffer == '0') - buffer--; - if (*buffer == '.') - buffer--; - while (buffer != stop) - *++buffer = 0; - } -} - -static char * -flt(char *str, double num, int size, int precision, char fmt, int flags) -{ - char tmp[80]; - char c, sign; - int n, i; - - // Left align means no zero padding - if (flags & LEFT) - flags &= ~ZEROPAD; - - // Determine padding and sign char - c = (flags & ZEROPAD) ? '0' : ' '; - sign = 0; - if (flags & SIGN) - { - if (num < 0.0) - { - sign = '-'; - num = -num; - size--; - } - else if (flags & PLUS) - { - sign = '+'; - size--; - } - else if (flags & SPACE) - { - sign = ' '; - size--; - } - } - - // Compute the precision value - if (precision < 0) - precision = 6; // Default precision: 6 - - // Convert floating point number to text - parse_float(num, tmp, fmt, precision); - - if ((flags & HEX_PREP) && precision == 0) - decimal_point(tmp); - if (fmt == 'g' && !(flags & HEX_PREP)) - cropzeros(tmp); - - n = strnlen(tmp, 256); - - // Output number with alignment and padding - size -= n; - if (!(flags & (ZEROPAD | LEFT))) - while (size-- > 0) - *str++ = ' '; - if (sign) - *str++ = sign; - if (!(flags & LEFT)) - while (size-- > 0) - *str++ = c; - for (i = 0; i < n; i++) - *str++ = tmp[i]; - while (size-- > 0) - *str++ = ' '; - - return str; -} - -#endif - -static int -ee_vsprintf(char *buf, const char *fmt, va_list args) -{ - int len; - unsigned long num; - int i, base; - char * str; - char * s; - - int flags; // Flags to number() - - int field_width; // Width of output field - int precision; // Min. # of digits for integers; max number of chars for - // from string - int qualifier; // 'h', 'l', or 'L' for integer fields - - for (str = buf; *fmt; fmt++) - { - if (*fmt != '%') - { - *str++ = *fmt; - continue; - } - - // Process flags - flags = 0; - repeat: - fmt++; // This also skips first '%' - switch (*fmt) - { - case '-': - flags |= LEFT; - goto repeat; - case '+': - flags |= PLUS; - goto repeat; - case ' ': - flags |= SPACE; - goto repeat; - case '#': - flags |= HEX_PREP; - goto repeat; - case '0': - flags |= ZEROPAD; - goto repeat; - } - - // Get field width - field_width = -1; - if (is_digit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') - { - fmt++; - field_width = va_arg(args, int); - if (field_width < 0) - { - field_width = -field_width; - flags |= LEFT; - } - } - - // Get the precision - precision = -1; - if (*fmt == '.') - { - ++fmt; - if (is_digit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') - { - ++fmt; - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - // Get the conversion qualifier - qualifier = -1; - if (*fmt == 'l' || *fmt == 'L') - { - qualifier = *fmt; - fmt++; - } - - // Default base - base = 10; - - switch (*fmt) - { - case 'c': - if (!(flags & LEFT)) - while (--field_width > 0) - *str++ = ' '; - *str++ = (unsigned char)va_arg(args, int); - while (--field_width > 0) - *str++ = ' '; - continue; - - case 's': - s = va_arg(args, char *); - if (!s) - s = ""; - len = strnlen(s, precision); - if (!(flags & LEFT)) - while (len < field_width--) - *str++ = ' '; - for (i = 0; i < len; ++i) - *str++ = *s++; - while (len < field_width--) - *str++ = ' '; - continue; - - case 'p': - if (field_width == -1) - { - field_width = 2 * sizeof(void *); - flags |= ZEROPAD; - } - str = number(str, - (unsigned long)va_arg(args, void *), - 16, - field_width, - precision, - flags); - continue; - - case 'A': - flags |= UPPERCASE; - - case 'a': - if (qualifier == 'l') - str = eaddr(str, - va_arg(args, unsigned char *), - field_width, - precision, - flags); - else - str = iaddr(str, - va_arg(args, unsigned char *), - field_width, - precision, - flags); - continue; - - // Integer number formats - set up the flags and "break" - case 'o': - base = 8; - break; - - case 'X': - flags |= UPPERCASE; - - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= SIGN; - - case 'u': - break; - -#if HAS_FLOAT - - case 'f': - str = flt(str, - va_arg(args, double), - field_width, - precision, - *fmt, - flags | SIGN); - continue; - -#endif - - default: - if (*fmt != '%') - *str++ = '%'; - if (*fmt) - *str++ = *fmt; - else - --fmt; - continue; - } - - if (qualifier == 'l') - num = va_arg(args, unsigned long); - else if (flags & SIGN) - num = va_arg(args, int); - else - num = va_arg(args, unsigned int); - - str = number(str, num, base, field_width, precision, flags); - } - - *str = '\0'; - return str - buf; -} - -void -uart_send_char(char c) -{ -#error "You must implement the method uart_send_char to use this file!\n"; - /* Output of a char to a UART usually follows the following model: - Wait until UART is ready - Write char to UART - Wait until UART is done - - Or in code: - while (*UART_CONTROL_ADDRESS != UART_READY); - *UART_DATA_ADDRESS = c; - while (*UART_CONTROL_ADDRESS != UART_READY); - - Check the UART sample code on your platform or the board - documentation. - */ -} - -int -ee_printf(const char *fmt, ...) -{ - char buf[1024], *p; - va_list args; - int n = 0; - - va_start(args, fmt); - ee_vsprintf(buf, fmt, args); - va_end(args); - p = buf; - while (*p) - { - uart_send_char(*p); - n++; - p++; - } - - return n; -} diff --git a/benchmarks/riscv-coremark/coremark/core_list_join.c b/benchmarks/riscv-coremark/coremark/core_list_join.c deleted file mode 100644 index 4bffeeccf..000000000 --- a/benchmarks/riscv-coremark/coremark/core_list_join.c +++ /dev/null @@ -1,580 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include "coremark.h" -//#include -//#include -/* -Topic: Description - Benchmark using a linked list. - - Linked list is a common data structure used in many applications. - - For our purposes, this will excercise the memory units of the processor. - In particular, usage of the list pointers to find and alter data. - - We are not using Malloc since some platforms do not support this library. - - Instead, the memory block being passed in is used to create a list, - and the benchmark takes care not to add more items then can be - accomodated by the memory block. The porting layer will make sure - that we have a valid memory block. - - All operations are done in place, without using any extra memory. - - The list itself contains list pointers and pointers to data items. - Data items contain the following: - - idx - An index that captures the initial order of the list. - data - Variable data initialized based on the input parameters. The 16b are divided as follows: - o Upper 8b are backup of original data. - o Bit 7 indicates if the lower 7 bits are to be used as is or calculated. - o Bits 0-2 indicate type of operation to perform to get a 7b value. - o Bits 3-6 provide input for the operation. - -*/ - -/* local functions */ - -list_head *core_list_find(list_head *list,list_data *info); -list_head *core_list_reverse(list_head *list); -list_head *core_list_remove(list_head *item); -list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified); -list_head *core_list_insert_new(list_head *insert_point - , list_data *info, list_head **memblock, list_data **datablock - , list_head *memblock_end, list_data *datablock_end); -typedef ee_s32(*list_cmp)(list_data *a, list_data *b, core_results *res); -list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res); - -ee_s16 calc_func(ee_s16 *pdata, core_results *res) { - ee_s16 data=*pdata; - ee_s16 retval; - ee_u8 optype=(data>>7) & 1; /* bit 7 indicates if the function result has been cached */ - if (optype) /* if cached, use cache */ - return (data & 0x007f); - else { /* otherwise calculate and cache the result */ - ee_s16 flag=data & 0x7; /* bits 0-2 is type of function to perform */ - ee_s16 dtype=((data>>3) & 0xf); /* bits 3-6 is specific data for the operation */ - dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ - switch (flag) { - case 0: - if (dtype<0x22) /* set min period for bit corruption */ - dtype=0x22; - retval=core_bench_state(res->size,res->memblock[3],res->seed1,res->seed2,dtype,res->crc); - if (res->crcstate==0) - res->crcstate=retval; - break; - case 1: - retval=core_bench_matrix(&(res->mat),dtype,res->crc); - if (res->crcmatrix==0) - res->crcmatrix=retval; - break; - default: - retval=data; - break; - } - res->crc=crcu16(retval,res->crc); - retval &= 0x007f; - *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ - return retval; - } -} -/* Function: cmp_complex - Compare the data item in a list cell. - - Can be used by mergesort. -*/ -ee_s32 cmp_complex(list_data *a, list_data *b, core_results *res) { - ee_s16 val1=calc_func(&(a->data16),res); - ee_s16 val2=calc_func(&(b->data16),res); - return val1 - val2; -} - -/* Function: cmp_idx - Compare the idx item in a list cell, and regen the data. - - Can be used by mergesort. -*/ -ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) { - if (res==NULL) { - a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8)); - b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8)); - } - return a->idx - b->idx; -} - -/*void ehitoa(int value, char *str, int base){ - if (value>100000) strcpy(str,"too big"); - else{ - int places[6] = {100000, 10000, 1000, 100, 10, 1}; - int col; - int pv; - for(col = 0; col<6; col++){ - pv = 0; - while (value >= places[col]){ - value=value -places[col]; - pv++; - - } - str[col]=pv+'0'; - } - str[6]=0; - } -}*/ - -void copy_info(list_data *to,list_data *from) { - to->data16=from->data16; - to->idx=from->idx; -} - -/* Benchmark for linked list: - - Try to find multiple data items. - - List sort - - Operate on data from list (crc) - - Single remove/reinsert - * At the end of this function, the list is back to original state -*/ -ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) { - ee_u16 retval=0; - ee_u16 found=0,missed=0; - list_head *list=res->list; - ee_s16 find_num=res->seed3; - list_head *this_find; - list_head *finder, *remover; - list_data info; - ee_s16 i; - //ee_printf("entered corebenchlist \n"); - info.idx=finder_idx; - /* find values in the list, and change the list each time (reverse and cache if value found) */ - for (i=0; inext->info->data16 >> 8) & 1; - //ee_printf("if statement \n"); - } - else { - found++; - //ee_printf("else statement \n"); - if (this_find->info->data16 & 0x1) /* use found value */ - retval+=(this_find->info->data16 >> 9) & 1; - /* and cache next item at the head of the list (if any) */ - if (this_find->next != NULL) { - finder = this_find->next; - this_find->next = finder->next; - finder->next=list->next; - list->next=finder; - } - } - if (info.idx>=0) - info.idx++; -#if CORE_DEBUG - //ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found); -#endif - } - retval+=found*4-missed; - /* sort the list by data content and remove one item*/ - if (finder_idx>0) - list=core_list_mergesort(list,cmp_complex,res); - remover=core_list_remove(list->next); - /* CRC data content of list from location of index N forward, and then undo remove */ - finder=core_list_find(list,&info); - if (!finder) - finder=list->next; - while (finder) { - retval=crc16(list->info->data16,retval); - finder=finder->next; - } -#if CORE_DEBUG - //ee_printf("List sort 1: %04x\n",retval); -#endif - remover=core_list_undo_remove(remover,list->next); - /* sort the list by index, in effect returning the list to original state */ - list=core_list_mergesort(list,cmp_idx,NULL); - /* CRC data content of list */ - finder=list->next; - while (finder) { - retval=crc16(list->info->data16,retval); - finder=finder->next; - } -#if CORE_DEBUG - //ee_printf("List sort 2: %04x\n",retval); -#endif - return retval; -} -/* Function: core_list_init - Initialize list with data. - - Parameters: - blksize - Size of memory to be initialized. - memblock - Pointer to memory block. - seed - Actual values chosen depend on the seed parameter. - The seed parameter MUST be supplied from a source that cannot be determined at compile time - - Returns: - Pointer to the head of the list. - -*/ -list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) { - /* calculated pointers for the list */ - //ee_printf("%d \n blksize", blksize); - ee_u32 per_item=16+sizeof(struct list_data_s); - //ee_printf("%d \n sizeof", sizeof(struct list_data_s)); - //ee_printf("%d \n per_item", per_item); - ee_u32 size=(blksize/per_item)-2; - //char bufftwo[200]; - //ehitoa(size, bufftwo, 10); - //ee_printf(" size = %s done \n", bufftwo); - //ee_printf("%d", size);/* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */ - list_head *memblock_end=memblock+size; - - list_data *datablock=(list_data *)(memblock_end); - list_data *datablock_end=datablock+size; - //ee_printf("datablock_end"); - /* some useful variables */ - ee_u32 i; - list_head *finder,*list=memblock; - list_data info; - //ehitoa(size, bufftwo, 10); - //ee_printf(" size2 = %s done \n", bufftwo); - - /* create a fake items for the list head and tail */ - list->next=NULL; - list->info=datablock; - list->info->idx=0x0000; - list->info->data16=(ee_s16)0x8080; - memblock++; - datablock++; - info.idx=0x7fff; - info.data16=(ee_s16)0xffff; - //ehitoa(size, bufftwo, 10); - //ee_printf(" size3 = %s done \n", bufftwo); - core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); - //ehitoa(size, bufftwo, 10); - //ee_printf(" size4 = %s done \n", bufftwo);; - /* then insert size items */ - for (i=0; inext; - i=1; - //ehitoa(i, bufftwo, 10); - //ee_printf(" i = %s done \n", bufftwo); - while (finder->next!=NULL) { - //ee_printf("enter while statement \n"); - if (iinfo->idx=i++; - //ehitoa(i, bufftwo, 10); - //ee_printf(" if i = %s done \n", bufftwo); - } - - else { - ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */ - finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */ - //ehitoa(i, bufftwo, 10); - //ee_printf(" else i = %s done \n", bufftwo); - } - finder=finder->next; - } - //ehitoa(i, bufftwo, 10); - //ee_printf(" i2 = %s done \n", bufftwo); - list = core_list_mergesort(list,cmp_idx,NULL); -#if CORE_DEBUG - //ee_printf("Initialized list:\n"); - finder=list; - while (finder) { - //ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16); - finder=finder->next; - } - //ee_printf("\n"); -#endif - return list; -} - -/* Function: core_list_insert - Insert an item to the list - - Parameters: - insert_point - where to insert the item. - info - data for the cell. - memblock - pointer for the list header - datablock - pointer for the list data - memblock_end - end of region for list headers - datablock_end - end of region for list data - - Returns: - Pointer to new item. -*/ -list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock - , list_head *memblock_end, list_data *datablock_end) { - list_head *newitem; - - if ((*memblock+1) >= memblock_end) - return NULL; - if ((*datablock+1) >= datablock_end) - return NULL; - - newitem=*memblock; - (*memblock)++; - newitem->next=insert_point->next; - insert_point->next=newitem; - - newitem->info=*datablock; - (*datablock)++; - copy_info(newitem->info,info); - - return newitem; -} - -/* Function: core_list_remove - Remove an item from the list. - - Operation: - For a singly linked list, remove by copying the data from the next item - over to the current cell, and unlinking the next item. - - Note: - since there is always a fake item at the end of the list, no need to check for NULL. - - Returns: - Removed item. -*/ -list_head *core_list_remove(list_head *item) { - list_data *tmp; - list_head *ret=item->next; - /* swap data pointers */ - tmp=item->info; - item->info=ret->info; - ret->info=tmp; - /* and eliminate item */ - item->next=item->next->next; - ret->next=NULL; - return ret; -} - -/* Function: core_list_undo_remove - Undo a remove operation. - - Operation: - Since we want each iteration of the benchmark to be exactly the same, - we need to be able to undo a remove. - Link the removed item back into the list, and switch the info items. - - Parameters: - item_removed - Return value from the - item_modified - List item that was modified during - - Returns: - The item that was linked back to the list. - -*/ -list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) { - list_data *tmp; - /* swap data pointers */ - tmp=item_removed->info; - item_removed->info=item_modified->info; - item_modified->info=tmp; - /* and insert item */ - item_removed->next=item_modified->next; - item_modified->next=item_removed; - return item_removed; -} - -/* Function: core_list_find - Find an item in the list - - Operation: - Find an item by idx (if not 0) or specific data value - - Parameters: - list - list head - info - idx or data to find - - Returns: - Found item, or NULL if not found. -*/ -list_head *core_list_find(list_head *list,list_data *info) { - //ee_printf("entered core_list_find \n"); - if (info->idx>=0) { - //ee_printf("find if \n"); - while (list && (list->info->idx != info->idx)){ - list=list->next; - //ee_printf("find while if \n"); - } - //ee_printf("core_list_find end \n"); - return list; - } else { - //ee_printf("find else"); - while (list && ((list->info->data16 & 0xff) != info->data16)){ - list=list->next; - //ee_printf("find while else \n"); - } - //ee_printf("core list find end \n"); - return list; - } -} -/* Function: core_list_reverse - Reverse a list - - Operation: - Rearrange the pointers so the list is reversed. - - Parameters: - list - list head - info - idx or data to find - - Returns: - Found item, or NULL if not found. -*/ - -list_head *core_list_reverse(list_head *list) { -// ee_printf("entered core_list_reverse"); - list_head *next=NULL, *tmp; - while (list) { - tmp=list->next; - list->next=next; - next=list; - list=tmp; - } - //ee_printf("core_list_reverse done"); - return next; -} -/* Function: core_list_mergesort - Sort the list in place without recursion. - - Description: - Use mergesort, as for linked list this is a realistic solution. - Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm. - The sort can either return the list to original order (by idx) , - or use the data item to invoke other other algorithms and change the order of the list. - - Parameters: - list - list to be sorted. - cmp - cmp function to use - - Returns: - New head of the list. - - Note: - We have a special header for the list that will always be first, - but the algorithm could theoretically modify where the list starts. - - */ -list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) { - list_head *p, *q, *e, *tail; - ee_s32 insize, nmerges, psize, qsize, i; - - insize = 1; - //char bufftwo[200]; - while (1) { - p = list; - list = NULL; - tail = NULL; - - nmerges = 0; /* count number of merges we do in this pass */ - //ehitoa(nmerges, bufftwo, 10); - //ee_printf(" nmerges default value = %s done \n", bufftwo); - while (p) { - nmerges++; /* there exists a merge to be done */ - //ehitoa(nmerges, bufftwo, 10); - //ee_printf(" current nmerges = %s done \n", bufftwo); - /* step `insize' places along from p */ - q = p; - psize = 0; - //ehitoa(insize, bufftwo, 10); - //ee_printf(" insize = %s done \n", bufftwo); - for (i = 0; i < insize; i++) { - //ehitoa(i, bufftwo, 10); - //ee_printf(" i = %s done \n", bufftwo); - psize++; - q = q->next; - if (!q) break; - } - - /* if q hasn't fallen off end, we have two lists to merge */ - qsize = insize; - //ehitoa(qsize, bufftwo, 10); - //ee_printf(" qsize = %s done \n", bufftwo); - - /* now we have two lists; merge them */ - while (psize > 0 || (qsize > 0 && q)) { - - /* decide whether next element of merge comes from p or q */ - if (psize == 0) { - //ee_printf("if \n"); - /* p is empty; e must come from q. */ - e = q; q = q->next; qsize--; - } else if (qsize == 0 || !q) { - //ee_printf("else if \n"); - /* q is empty; e must come from p. */ - e = p; p = p->next; psize--; - } else if (cmp(p->info,q->info,res) <= 0) { - //ee_printf("else if 2 \n"); - /* First element of p is lower (or same); e must come from p. */ - e = p; p = p->next; psize--; - } else { - //ee_printf("else \n"); - /* First element of q is lower; e must come from q. */ - e = q; q = q->next; qsize--; - } - - /* add the next element to the merged list */ - if (tail) { - //ee_printf("tail if \n"); - tail->next = e; - } else { - //ee_printf("tail else \n"); - list = e; - } - tail = e; - } - - /* now p has stepped `insize' places along, and q has too */ - p = q; - } - - tail->next = NULL; - - /* If we have done only one merge, we're finished. */ - if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ - return list; - - /* Otherwise repeat, merging lists twice the size */ - insize *= 2; - //ehitoa(insize, bufftwo, 10); - //ee_printf(" insize2 = %s done \n", bufftwo); - } -#if COMPILER_REQUIRES_SORT_RETURN - return list; -#endif -} diff --git a/benchmarks/riscv-coremark/coremark/core_main.c b/benchmarks/riscv-coremark/coremark/core_main.c deleted file mode 100644 index 8467d9459..000000000 --- a/benchmarks/riscv-coremark/coremark/core_main.c +++ /dev/null @@ -1,448 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* File: core_main.c - This file contains the framework to acquire a block of memory, seed - initial parameters, tun t he benchmark and report the results. -*/ -#include "coremark.h" - -/* Function: iterate - Run the benchmark for a specified number of iterations. - - Operation: - For each type of benchmarked algorithm: - a - Initialize the data block for the algorithm. - b - Execute the algorithm N times. - - Returns: - NULL. -*/ -static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0, - (ee_u16)0x3340, - (ee_u16)0x6a79, - (ee_u16)0xe714, - (ee_u16)0xe3c1 }; -static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52, - (ee_u16)0x1199, - (ee_u16)0x5608, - (ee_u16)0x1fd7, - (ee_u16)0x0747 }; -static ee_u16 state_known_crc[] = { (ee_u16)0x5e47, - (ee_u16)0x39bf, - (ee_u16)0xe5a4, - (ee_u16)0x8e3a, - (ee_u16)0x8d84 }; -void * -iterate(void *pres) -{ - ee_u32 i; - ee_u16 crc; - core_results *res = (core_results *)pres; - ee_u32 iterations = res->iterations; - res->crc = 0; - res->crclist = 0; - res->crcmatrix = 0; - res->crcstate = 0; - - for (i = 0; i < iterations; i++) - { - crc = core_bench_list(res, 1); - res->crc = crcu16(crc, res->crc); - crc = core_bench_list(res, -1); - res->crc = crcu16(crc, res->crc); - if (i == 0) - res->crclist = res->crc; - } - return NULL; -} - -#if (SEED_METHOD == SEED_ARG) -ee_s32 get_seed_args(int i, int argc, char *argv[]); -#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv) -#define get_seed_32(x) get_seed_args(x, argc, argv) -#else /* via function or volatile */ -ee_s32 get_seed_32(int i); -#define get_seed(x) (ee_s16) get_seed_32(x) -#endif - -#if (MEM_METHOD == MEM_STATIC) -ee_u8 static_memblk[TOTAL_DATA_SIZE]; -#endif -char *mem_name[3] = { "Static", "Heap", "Stack" }; -/* Function: main - Main entry routine for the benchmark. - This function is responsible for the following steps: - - 1 - Initialize input seeds from a source that cannot be determined at - compile time. 2 - Initialize memory block for use. 3 - Run and time the - benchmark. 4 - Report results, testing the validity of the output if the - seeds are known. - - Arguments: - 1 - first seed : Any value - 2 - second seed : Must be identical to first for iterations to be - identical 3 - third seed : Any value, should be at least an order of - magnitude less then the input size, but bigger then 32. 4 - Iterations : - Special, if set to 0, iterations will be automatically determined such that - the benchmark will run between 10 to 100 secs - -*/ - -#if MAIN_HAS_NOARGC -MAIN_RETURN_TYPE -main(void) -{ - int argc = 0; - char *argv[1]; -#else -MAIN_RETURN_TYPE -main(int argc, char *argv[]) -{ -#endif - ee_printf("SHOWTIME\n"); - ee_u16 i, j = 0, num_algorithms = 0; - ee_s16 known_id = -1, total_errors = 0; - ee_u16 seedcrc = 0; - CORE_TICKS total_time; - core_results results[MULTITHREAD]; -#if (MEM_METHOD == MEM_STACK) - ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD]; -#endif - /* first call any initializations needed */ - portable_init(&(results[0].port), &argc, argv); - /* First some checks to make sure benchmark will run ok */ - if (sizeof(struct list_head_s) > 128) - { - ee_printf("list_head structure too big for comparable data!\n"); - return MAIN_RETURN_VAL; - } - results[0].seed1 = get_seed(1); - results[0].seed2 = get_seed(2); - results[0].seed3 = get_seed(3); - results[0].iterations = get_seed_32(4); -#if CORE_DEBUG - results[0].iterations = 1; -#endif - results[0].execs = get_seed_32(5); - if (results[0].execs == 0) - { /* if not supplied, execute all algorithms */ - results[0].execs = ALL_ALGORITHMS_MASK; - } - /* put in some default values based on one seed only for easy testing */ - if ((results[0].seed1 == 0) && (results[0].seed2 == 0) - && (results[0].seed3 == 0)) - { /* perfromance run */ - results[0].seed1 = 0; - results[0].seed2 = 0; - results[0].seed3 = 0x66; - } - if ((results[0].seed1 == 1) && (results[0].seed2 == 0) - && (results[0].seed3 == 0)) - { /* validation run */ - results[0].seed1 = 0x3415; - results[0].seed2 = 0x3415; - results[0].seed3 = 0x66; - } -#if (MEM_METHOD == MEM_STATIC) - results[0].memblock[0] = (void *)static_memblk; - results[0].size = TOTAL_DATA_SIZE; - results[0].err = 0; -#if (MULTITHREAD > 1) -#error "Cannot use a static data area with multiple contexts!" -#endif -#elif (MEM_METHOD == MEM_MALLOC) - for (i = 0; i < MULTITHREAD; i++) - { - ee_s32 malloc_override = get_seed(7); - if (malloc_override != 0) - results[i].size = malloc_override; - else - results[i].size = TOTAL_DATA_SIZE; - results[i].memblock[0] = portable_malloc(results[i].size); - results[i].seed1 = results[0].seed1; - results[i].seed2 = results[0].seed2; - results[i].seed3 = results[0].seed3; - results[i].err = 0; - results[i].execs = results[0].execs; - } -#elif (MEM_METHOD == MEM_STACK) -for (i = 0; i < MULTITHREAD; i++) -{ - results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE; - results[i].size = TOTAL_DATA_SIZE; - results[i].seed1 = results[0].seed1; - results[i].seed2 = results[0].seed2; - results[i].seed3 = results[0].seed3; - results[i].err = 0; - results[i].execs = results[0].execs; -} -#else -#error "Please define a way to initialize a memory block." -#endif - /* Data init */ - /* Find out how space much we have based on number of algorithms */ - for (i = 0; i < NUM_ALGORITHMS; i++) - { - if ((1 << (ee_u32)i) & results[0].execs) - num_algorithms++; - } - for (i = 0; i < MULTITHREAD; i++) - results[i].size = results[i].size / num_algorithms; - /* Assign pointers */ - for (i = 0; i < NUM_ALGORITHMS; i++) - { - ee_u32 ctx; - if ((1 << (ee_u32)i) & results[0].execs) - { - for (ctx = 0; ctx < MULTITHREAD; ctx++) - results[ctx].memblock[i + 1] - = (char *)(results[ctx].memblock[0]) + results[0].size * j; - j++; - } - } - /* call inits */ - for (i = 0; i < MULTITHREAD; i++) - { - if (results[i].execs & ID_LIST) - { - results[i].list = core_list_init( - results[0].size, results[i].memblock[1], results[i].seed1); - } - if (results[i].execs & ID_MATRIX) - { - core_init_matrix(results[0].size, - results[i].memblock[2], - (ee_s32)results[i].seed1 - | (((ee_s32)results[i].seed2) << 16), - &(results[i].mat)); - } - if (results[i].execs & ID_STATE) - { - core_init_state( - results[0].size, results[i].seed1, results[i].memblock[3]); - } - } - - /* automatically determine number of iterations if not set */ - // results[0].iterations = 2; // temporary for speed - if (results[0].iterations == 0) - { - secs_ret secs_passed = 0; - ee_u32 divisor; - results[0].iterations = 1; - while (secs_passed < (secs_ret)1) - { - results[0].iterations *= 10; - start_time(); - iterate(&results[0]); - stop_time(); - secs_passed = time_in_secs(get_time()); - } - /* now we know it executes for at least 1 sec, set actual run time at - * about 10 secs */ - divisor = (ee_u32)secs_passed; - if (divisor == 0) /* some machines cast float to int as 0 since this - conversion is not defined by ANSI, but we know at - least one second passed */ - divisor = 1; - results[0].iterations *= 1 + 10 / divisor; - } - /* perform actual benchmark */ - start_time(); -#if (MULTITHREAD > 1) - if (default_num_contexts > MULTITHREAD) - { - default_num_contexts = MULTITHREAD; - } - for (i = 0; i < default_num_contexts; i++) - { - results[i].iterations = results[0].iterations; - results[i].execs = results[0].execs; - core_start_parallel(&results[i]); - } - for (i = 0; i < default_num_contexts; i++) - { - core_stop_parallel(&results[i]); - } -#else - iterate(&results[0]); -#endif - stop_time(); - total_time = get_time(); - /* get a function of the input to report */ - seedcrc = crc16(results[0].seed1, seedcrc); - seedcrc = crc16(results[0].seed2, seedcrc); - seedcrc = crc16(results[0].seed3, seedcrc); - seedcrc = crc16(results[0].size, seedcrc); - - switch (seedcrc) - { /* test known output for common seeds */ - case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */ - known_id = 0; - ee_printf("6k performance run parameters for coremark.\n"); - break; - case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per - algorithm */ - known_id = 1; - ee_printf("6k validation run parameters for coremark.\n"); - break; - case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm - */ - known_id = 2; - ee_printf("Profile generation run parameters for coremark.\n"); - break; - case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */ - known_id = 3; - ee_printf("2K performance run parameters for coremark.\n"); - break; - case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per - algorithm */ - known_id = 4; - ee_printf("2K validation run parameters for coremark.\n"); - break; - default: - total_errors = -1; - break; - } - if (known_id >= 0) - { - for (i = 0; i < default_num_contexts; i++) - { - results[i].err = 0; - if ((results[i].execs & ID_LIST) - && (results[i].crclist != list_known_crc[known_id])) - { - ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n", - i, - results[i].crclist, - list_known_crc[known_id]); - results[i].err++; - } - if ((results[i].execs & ID_MATRIX) - && (results[i].crcmatrix != matrix_known_crc[known_id])) - { - ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n", - i, - results[i].crcmatrix, - matrix_known_crc[known_id]); - results[i].err++; - } - if ((results[i].execs & ID_STATE) - && (results[i].crcstate != state_known_crc[known_id])) - { - ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n", - i, - results[i].crcstate, - state_known_crc[known_id]); - results[i].err++; - } - total_errors += results[i].err; - } - } - total_errors += check_data_types(); - /* and report results */ - ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size); - ee_printf("Total ticks : %lu\n", (long unsigned)total_time); -#if HAS_FLOAT - ee_printf("Total time (msecs): %ld\n", (long unsigned)(1000*time_in_secs(total_time))); - if (time_in_secs(total_time) > 0) - ee_printf("Iterations/Sec : %lu / %lu\n", - default_num_contexts * results[0].iterations, - total_time); -#else - ee_printf("Total time (secs): %d\n", time_in_secs(total_time)); - if (time_in_secs(total_time) > 0) - ee_printf("Iterations/Sec : %d\n", - default_num_contexts * results[0].iterations - / time_in_secs(total_time)); -#endif - if (time_in_secs(total_time) < 10) - { - ee_printf( - "ERROR! Must execute for at least 10 secs for a valid result!\n"); - total_errors++; - } - - ee_printf("Iterations : %lu\n", - (long unsigned)default_num_contexts * results[0].iterations); - ee_printf("Compiler version : %s\n", COMPILER_VERSION); - ee_printf("Compiler flags : %s\n", COMPILER_FLAGS); -#if (MULTITHREAD > 1) - ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts); -#endif - ee_printf("Memory location : %s\n", MEM_LOCATION); - /* output for verification */ - ee_printf("seedcrc : 0x%04x\n", seedcrc); - if (results[0].execs & ID_LIST) - for (i = 0; i < default_num_contexts; i++) - ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist); - if (results[0].execs & ID_MATRIX) - for (i = 0; i < default_num_contexts; i++) - ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix); - if (results[0].execs & ID_STATE) - for (i = 0; i < default_num_contexts; i++) - ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate); - for (i = 0; i < default_num_contexts; i++) - ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc); - if (total_errors == 0) - { - ee_printf( - "Correct operation validated. See README.md for run and reporting " - "rules.\n"); -#if HAS_FLOAT - if (known_id == 3) - { - unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time); - secs_ret totalmsecs = time_in_secs(total_time); - int totalmint = (int) totalmsecs; - ee_printf("ELAPSED TIME: %d\n", totalmint); - - ee_printf("CoreMark 1.0 : %d / %s %s", - tmp, - COMPILER_VERSION, - COMPILER_FLAGS); -#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC) - ee_printf(" / %s", MEM_LOCATION); -#else - ee_printf(" / %s", mem_name[MEM_METHOD]); -#endif - -#if (MULTITHREAD > 1) - ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD); -#endif - ee_printf("\n"); - } -#endif - } - if (total_errors > 0) - ee_printf("Errors detected\n"); - if (total_errors < 0) - ee_printf( - "Cannot validate operation for these seed values, please compare " - "with results on a known platform.\n"); - -#if (MEM_METHOD == MEM_MALLOC) - for (i = 0; i < MULTITHREAD; i++) - portable_free(results[i].memblock[0]); -#endif - /* And last call any target specific code for finalizing */ - portable_fini(&(results[0].port)); - - return MAIN_RETURN_VAL; -} diff --git a/benchmarks/riscv-coremark/coremark/core_matrix.c b/benchmarks/riscv-coremark/coremark/core_matrix.c deleted file mode 100644 index 29fd8ab45..000000000 --- a/benchmarks/riscv-coremark/coremark/core_matrix.c +++ /dev/null @@ -1,359 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include "coremark.h" -/* -Topic: Description - Matrix manipulation benchmark - - This very simple algorithm forms the basis of many more complex -algorithms. - - The tight inner loop is the focus of many optimizations (compiler as -well as hardware based) and is thus relevant for embedded processing. - - The total available data space will be divided to 3 parts: - NxN Matrix A - initialized with small values (upper 3/4 of the bits all -zero). NxN Matrix B - initialized with medium values (upper half of the bits all -zero). NxN Matrix C - used for the result. - - The actual values for A and B must be derived based on input that is not -available at compile time. -*/ -ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val); -ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval); -void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val); -void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); -void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); -void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); -void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val); - -#define matrix_test_next(x) (x + 1) -#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff) -#define matrix_big(x) (0xf000 | (x)) -#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to)))) - -#if CORE_DEBUG -void -printmat(MATDAT *A, ee_u32 N, char *name) -{ - ee_u32 i, j; - ee_printf("Matrix %s [%dx%d]:\n", name, N, N); - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - if (j != 0) - ee_printf(","); - ee_printf("%d", A[i * N + j]); - } - ee_printf("\n"); - } -} -void -printmatC(MATRES *C, ee_u32 N, char *name) -{ - ee_u32 i, j; - ee_printf("Matrix %s [%dx%d]:\n", name, N, N); - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - if (j != 0) - ee_printf(","); - ee_printf("%d", C[i * N + j]); - } - ee_printf("\n"); - } -} -#endif -/* Function: core_bench_matrix - Benchmark function - - Iterate N times, - changing the matrix values slightly by a constant amount each time. -*/ -ee_u16 -core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) -{ - ee_u32 N = p->N; - MATRES *C = p->C; - MATDAT *A = p->A; - MATDAT *B = p->B; - MATDAT val = (MATDAT)seed; - - crc = crc16(matrix_test(N, C, A, B, val), crc); - - return crc; -} - -/* Function: matrix_test - Perform matrix manipulation. - - Parameters: - N - Dimensions of the matrix. - C - memory for result matrix. - A - input matrix - B - operator matrix (not changed during operations) - - Returns: - A CRC value that captures all results calculated in the function. - In particular, crc of the value calculated on the result matrix - after each step by . - - Operation: - - 1 - Add a constant value to all elements of a matrix. - 2 - Multiply a matrix by a constant. - 3 - Multiply a matrix by a vector. - 4 - Multiply a matrix by a matrix. - 5 - Add a constant value to all elements of a matrix. - - After the last step, matrix A is back to original contents. -*/ -ee_s16 -matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) -{ - ee_u16 crc = 0; - MATDAT clipval = matrix_big(val); - - matrix_add_const(N, A, val); /* make sure data changes */ -#if CORE_DEBUG - printmat(A, N, "matrix_add_const"); -#endif - matrix_mul_const(N, C, A, val); - crc = crc16(matrix_sum(N, C, clipval), crc); -#if CORE_DEBUG - printmatC(C, N, "matrix_mul_const"); -#endif - matrix_mul_vect(N, C, A, B); - crc = crc16(matrix_sum(N, C, clipval), crc); -#if CORE_DEBUG - printmatC(C, N, "matrix_mul_vect"); -#endif - matrix_mul_matrix(N, C, A, B); - crc = crc16(matrix_sum(N, C, clipval), crc); -#if CORE_DEBUG - printmatC(C, N, "matrix_mul_matrix"); -#endif - matrix_mul_matrix_bitextract(N, C, A, B); - crc = crc16(matrix_sum(N, C, clipval), crc); -#if CORE_DEBUG - printmatC(C, N, "matrix_mul_matrix_bitextract"); -#endif - - matrix_add_const(N, A, -val); /* return matrix to initial value */ - return crc; -} - -/* Function : matrix_init - Initialize the memory block for matrix benchmarking. - - Parameters: - blksize - Size of memory to be initialized. - memblk - Pointer to memory block. - seed - Actual values chosen depend on the seed parameter. - p - pointers to containing initialized matrixes. - - Returns: - Matrix dimensions. - - Note: - The seed parameter MUST be supplied from a source that cannot be - determined at compile time -*/ -ee_u32 -core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) -{ - ee_u32 N = 0; - MATDAT *A; - MATDAT *B; - ee_s32 order = 1; - MATDAT val; - ee_u32 i = 0, j = 0; - if (seed == 0) - seed = 1; - while (j < blksize) - { - i++; - j = i * i * 2 * 4; - } - N = i - 1; - A = (MATDAT *)align_mem(memblk); - B = A + N * N; - - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - seed = ((order * seed) % 65536); - val = (seed + order); - val = matrix_clip(val, 0); - B[i * N + j] = val; - val = (val + order); - val = matrix_clip(val, 1); - A[i * N + j] = val; - order++; - } - } - - p->A = A; - p->B = B; - p->C = (MATRES *)align_mem(B + N * N); - p->N = N; -#if CORE_DEBUG - printmat(A, N, "A"); - printmat(B, N, "B"); -#endif - return N; -} - -/* Function: matrix_sum - Calculate a function that depends on the values of elements in the - matrix. - - For each element, accumulate into a temporary variable. - - As long as this value is under the parameter clipval, - add 1 to the result if the element is bigger then the previous. - - Otherwise, reset the accumulator and add 10 to the result. -*/ -ee_s16 -matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) -{ - MATRES tmp = 0, prev = 0, cur = 0; - ee_s16 ret = 0; - ee_u32 i, j; - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - cur = C[i * N + j]; - tmp += cur; - if (tmp > clipval) - { - ret += 10; - tmp = 0; - } - else - { - ret += (cur > prev) ? 1 : 0; - } - prev = cur; - } - } - return ret; -} - -/* Function: matrix_mul_const - Multiply a matrix by a constant. - This could be used as a scaler for instance. -*/ -void -matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) -{ - ee_u32 i, j; - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val; - } - } -} - -/* Function: matrix_add_const - Add a constant value to all elements of a matrix. -*/ -void -matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) -{ - ee_u32 i, j; - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - A[i * N + j] += val; - } - } -} - -/* Function: matrix_mul_vect - Multiply a matrix by a vector. - This is common in many simple filters (e.g. fir where a vector of - coefficients is applied to the matrix.) -*/ -void -matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) -{ - ee_u32 i, j; - for (i = 0; i < N; i++) - { - C[i] = 0; - for (j = 0; j < N; j++) - { - C[i] += (MATRES)A[i * N + j] * (MATRES)B[j]; - } - } -} - -/* Function: matrix_mul_matrix - Multiply a matrix by a matrix. - Basic code is used in many algorithms, mostly with minor changes such as - scaling. -*/ -void -matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) -{ - ee_u32 i, j, k; - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - C[i * N + j] = 0; - for (k = 0; k < N; k++) - { - C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j]; - } - } - } -} - -/* Function: matrix_mul_matrix_bitextract - Multiply a matrix by a matrix, and extract some bits from the result. - Basic code is used in many algorithms, mostly with minor changes such as - scaling. -*/ -void -matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) -{ - ee_u32 i, j, k; - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - C[i * N + j] = 0; - for (k = 0; k < N; k++) - { - MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j]; - C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7); - } - } - } -} diff --git a/benchmarks/riscv-coremark/coremark/core_state.c b/benchmarks/riscv-coremark/coremark/core_state.c deleted file mode 100644 index 6dbab9dd9..000000000 --- a/benchmarks/riscv-coremark/coremark/core_state.c +++ /dev/null @@ -1,330 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include "coremark.h" -/* local functions */ -enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count); - -/* -Topic: Description - Simple state machines like this one are used in many embedded products. - - For more complex state machines, sometimes a state transition table -implementation is used instead, trading speed of direct coding for ease of -maintenance. - - Since the main goal of using a state machine in CoreMark is to excercise -the switch/if behaviour, we are using a small moore machine. - - In particular, this machine tests type of string input, - trying to determine whether the input is a number or something else. - (see core_state.png). -*/ - -/* Function: core_bench_state - Benchmark function - - Go over the input twice, once direct, and once after introducing some - corruption. -*/ -ee_u16 -core_bench_state(ee_u32 blksize, - ee_u8 *memblock, - ee_s16 seed1, - ee_s16 seed2, - ee_s16 step, - ee_u16 crc) -{ - ee_u32 final_counts[NUM_CORE_STATES]; - ee_u32 track_counts[NUM_CORE_STATES]; - ee_u8 *p = memblock; - ee_u32 i; - -#if CORE_DEBUG - ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc); -#endif - for (i = 0; i < NUM_CORE_STATES; i++) - { - final_counts[i] = track_counts[i] = 0; - } - /* run the state machine over the input */ - while (*p != 0) - { - enum CORE_STATE fstate = core_state_transition(&p, track_counts); - final_counts[fstate]++; -#if CORE_DEBUG - ee_printf("%d,", fstate); - } - ee_printf("\n"); -#else - } -#endif - p = memblock; - while (p < (memblock + blksize)) - { /* insert some corruption */ - if (*p != ',') - *p ^= (ee_u8)seed1; - p += step; - } - p = memblock; - /* run the state machine over the input again */ - while (*p != 0) - { - enum CORE_STATE fstate = core_state_transition(&p, track_counts); - final_counts[fstate]++; -#if CORE_DEBUG - ee_printf("%d,", fstate); - } - ee_printf("\n"); -#else - } -#endif - p = memblock; - while (p < (memblock + blksize)) - { /* undo corruption is seed1 and seed2 are equal */ - if (*p != ',') - *p ^= (ee_u8)seed2; - p += step; - } - /* end timing */ - for (i = 0; i < NUM_CORE_STATES; i++) - { - crc = crcu32(final_counts[i], crc); - crc = crcu32(track_counts[i], crc); - } - return crc; -} - -/* Default initialization patterns */ -static ee_u8 *intpat[4] - = { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" }; -static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400", - (ee_u8 *)".1234500", - (ee_u8 *)"-110.700", - (ee_u8 *)"+0.64400" }; -static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3", - (ee_u8 *)"-.123e-2", - (ee_u8 *)"-87e+832", - (ee_u8 *)"+0.6e-12" }; -static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F", - (ee_u8 *)"-T.T++Tq", - (ee_u8 *)"1T3.4e4z", - (ee_u8 *)"34.0e-T^" }; - -/* Function: core_init_state - Initialize the input data for the state machine. - - Populate the input with several predetermined strings, interspersed. - Actual patterns chosen depend on the seed parameter. - - Note: - The seed parameter MUST be supplied from a source that cannot be - determined at compile time -*/ -void -core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) -{ - ee_u32 total = 0, next = 0, i; - ee_u8 *buf = 0; -#if CORE_DEBUG - ee_u8 *start = p; - ee_printf("State: %d,%d\n", size, seed); -#endif - size--; - next = 0; - while ((total + next + 1) < size) - { - if (next > 0) - { - for (i = 0; i < next; i++) - *(p + total + i) = buf[i]; - *(p + total + i) = ','; - total += next + 1; - } - seed++; - switch (seed & 0x7) - { - case 0: /* int */ - case 1: /* int */ - case 2: /* int */ - buf = intpat[(seed >> 3) & 0x3]; - next = 4; - break; - case 3: /* float */ - case 4: /* float */ - buf = floatpat[(seed >> 3) & 0x3]; - next = 8; - break; - case 5: /* scientific */ - case 6: /* scientific */ - buf = scipat[(seed >> 3) & 0x3]; - next = 8; - break; - case 7: /* invalid */ - buf = errpat[(seed >> 3) & 0x3]; - next = 8; - break; - default: /* Never happen, just to make some compilers happy */ - break; - } - } - size++; - while (total < size) - { /* fill the rest with 0 */ - *(p + total) = 0; - total++; - } -#if CORE_DEBUG - ee_printf("State Input: %s\n", start); -#endif -} - -static ee_u8 -ee_isdigit(ee_u8 c) -{ - ee_u8 retval; - retval = ((c >= '0') & (c <= '9')) ? 1 : 0; - return retval; -} - -/* Function: core_state_transition - Actual state machine. - - The state machine will continue scanning until either: - 1 - an invalid input is detcted. - 2 - a valid number has been detected. - - The input pointer is updated to point to the end of the token, and the - end state is returned (either specific format determined or invalid). -*/ - -enum CORE_STATE -core_state_transition(ee_u8 **instr, ee_u32 *transition_count) -{ - ee_u8 * str = *instr; - ee_u8 NEXT_SYMBOL; - enum CORE_STATE state = CORE_START; - for (; *str && state != CORE_INVALID; str++) - { - NEXT_SYMBOL = *str; - if (NEXT_SYMBOL == ',') /* end of this input */ - { - str++; - break; - } - switch (state) - { - case CORE_START: - if (ee_isdigit(NEXT_SYMBOL)) - { - state = CORE_INT; - } - else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-') - { - state = CORE_S1; - } - else if (NEXT_SYMBOL == '.') - { - state = CORE_FLOAT; - } - else - { - state = CORE_INVALID; - transition_count[CORE_INVALID]++; - } - transition_count[CORE_START]++; - break; - case CORE_S1: - if (ee_isdigit(NEXT_SYMBOL)) - { - state = CORE_INT; - transition_count[CORE_S1]++; - } - else if (NEXT_SYMBOL == '.') - { - state = CORE_FLOAT; - transition_count[CORE_S1]++; - } - else - { - state = CORE_INVALID; - transition_count[CORE_S1]++; - } - break; - case CORE_INT: - if (NEXT_SYMBOL == '.') - { - state = CORE_FLOAT; - transition_count[CORE_INT]++; - } - else if (!ee_isdigit(NEXT_SYMBOL)) - { - state = CORE_INVALID; - transition_count[CORE_INT]++; - } - break; - case CORE_FLOAT: - if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e') - { - state = CORE_S2; - transition_count[CORE_FLOAT]++; - } - else if (!ee_isdigit(NEXT_SYMBOL)) - { - state = CORE_INVALID; - transition_count[CORE_FLOAT]++; - } - break; - case CORE_S2: - if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-') - { - state = CORE_EXPONENT; - transition_count[CORE_S2]++; - } - else - { - state = CORE_INVALID; - transition_count[CORE_S2]++; - } - break; - case CORE_EXPONENT: - if (ee_isdigit(NEXT_SYMBOL)) - { - state = CORE_SCIENTIFIC; - transition_count[CORE_EXPONENT]++; - } - else - { - state = CORE_INVALID; - transition_count[CORE_EXPONENT]++; - } - break; - case CORE_SCIENTIFIC: - if (!ee_isdigit(NEXT_SYMBOL)) - { - state = CORE_INVALID; - transition_count[CORE_INVALID]++; - } - break; - default: - break; - } - } - *instr = str; - return state; -} diff --git a/benchmarks/riscv-coremark/coremark/core_util.c b/benchmarks/riscv-coremark/coremark/core_util.c deleted file mode 100644 index 67c5d7757..000000000 --- a/benchmarks/riscv-coremark/coremark/core_util.c +++ /dev/null @@ -1,249 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include "coremark.h" -/* Function: get_seed - Get a values that cannot be determined at compile time. - - Since different embedded systems and compilers are used, 3 different - methods are provided: 1 - Using a volatile variable. This method is only - valid if the compiler is forced to generate code that reads the value of a - volatile variable from memory at run time. Please note, if using this method, - you would need to modify core_portme.c to generate training profile. 2 - - Command line arguments. This is the preferred method if command line - arguments are supported. 3 - System function. If none of the first 2 methods - is available on the platform, a system function which is not a stub can be - used. - - e.g. read the value on GPIO pins connected to switches, or invoke - special simulator functions. -*/ -#if (SEED_METHOD == SEED_VOLATILE) -extern volatile ee_s32 seed1_volatile; -extern volatile ee_s32 seed2_volatile; -extern volatile ee_s32 seed3_volatile; -extern volatile ee_s32 seed4_volatile; -extern volatile ee_s32 seed5_volatile; -ee_s32 -get_seed_32(int i) -{ - ee_s32 retval; - switch (i) - { - case 1: - retval = seed1_volatile; - break; - case 2: - retval = seed2_volatile; - break; - case 3: - retval = seed3_volatile; - break; - case 4: - retval = seed4_volatile; - break; - case 5: - retval = seed5_volatile; - break; - default: - retval = 0; - break; - } - return retval; -} -#elif (SEED_METHOD == SEED_ARG) -ee_s32 -parseval(char *valstring) -{ - ee_s32 retval = 0; - ee_s32 neg = 1; - int hexmode = 0; - if (*valstring == '-') - { - neg = -1; - valstring++; - } - if ((valstring[0] == '0') && (valstring[1] == 'x')) - { - hexmode = 1; - valstring += 2; - } - /* first look for digits */ - if (hexmode) - { - while (((*valstring >= '0') && (*valstring <= '9')) - || ((*valstring >= 'a') && (*valstring <= 'f'))) - { - ee_s32 digit = *valstring - '0'; - if (digit > 9) - digit = 10 + *valstring - 'a'; - retval *= 16; - retval += digit; - valstring++; - } - } - else - { - while ((*valstring >= '0') && (*valstring <= '9')) - { - ee_s32 digit = *valstring - '0'; - retval *= 10; - retval += digit; - valstring++; - } - } - /* now add qualifiers */ - if (*valstring == 'K') - retval *= 1024; - if (*valstring == 'M') - retval *= 1024 * 1024; - - retval *= neg; - return retval; -} - -ee_s32 -get_seed_args(int i, int argc, char *argv[]) -{ - if (argc > i) - return parseval(argv[i]); - return 0; -} - -#elif (SEED_METHOD == SEED_FUNC) -/* If using OS based function, you must define and implement the functions below - * in core_portme.h and core_portme.c ! */ -ee_s32 -get_seed_32(int i) -{ - ee_s32 retval; - switch (i) - { - case 1: - retval = portme_sys1(); - break; - case 2: - retval = portme_sys2(); - break; - case 3: - retval = portme_sys3(); - break; - case 4: - retval = portme_sys4(); - break; - case 5: - retval = portme_sys5(); - break; - default: - retval = 0; - break; - } - return retval; -} -#endif - -/* Function: crc* - Service functions to calculate 16b CRC code. - -*/ -ee_u16 -crcu8(ee_u8 data, ee_u16 crc) -{ - ee_u8 i = 0, x16 = 0, carry = 0; - - for (i = 0; i < 8; i++) - { - x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); - data >>= 1; - - if (x16 == 1) - { - crc ^= 0x4002; - carry = 1; - } - else - carry = 0; - crc >>= 1; - if (carry) - crc |= 0x8000; - else - crc &= 0x7fff; - } - return crc; -} -ee_u16 -crcu16(ee_u16 newval, ee_u16 crc) -{ - crc = crcu8((ee_u8)(newval), crc); - crc = crcu8((ee_u8)((newval) >> 8), crc); - return crc; -} -ee_u16 -crcu32(ee_u32 newval, ee_u16 crc) -{ - crc = crc16((ee_s16)newval, crc); - crc = crc16((ee_s16)(newval >> 16), crc); - return crc; -} -ee_u16 -crc16(ee_s16 newval, ee_u16 crc) -{ - return crcu16((ee_u16)newval, crc); -} - -ee_u8 -check_data_types() -{ - ee_u8 retval = 0; - if (sizeof(ee_u8) != 1) - { - ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); - retval++; - } - if (sizeof(ee_u16) != 2) - { - ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); - retval++; - } - if (sizeof(ee_s16) != 2) - { - ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); - retval++; - } - if (sizeof(ee_s32) != 4) - { - ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); - retval++; - } - if (sizeof(ee_u32) != 4) - { - ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); - retval++; - } - if (sizeof(ee_ptr_int) != sizeof(int *)) - { - ee_printf( - "ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); - retval++; - } - if (retval > 0) - { - ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); - } - return retval; -} diff --git a/benchmarks/riscv-coremark/coremark/coremark.exe b/benchmarks/riscv-coremark/coremark/coremark.exe deleted file mode 100755 index 75b8e8468..000000000 Binary files a/benchmarks/riscv-coremark/coremark/coremark.exe and /dev/null differ diff --git a/benchmarks/riscv-coremark/coremark/coremark.h b/benchmarks/riscv-coremark/coremark/coremark.h deleted file mode 100644 index 9c5e4060a..000000000 --- a/benchmarks/riscv-coremark/coremark/coremark.h +++ /dev/null @@ -1,183 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains declarations of the various benchmark functions. -*/ - -/* Configuration: TOTAL_DATA_SIZE - Define total size for data algorithms will operate on -*/ -#ifndef TOTAL_DATA_SIZE -#define TOTAL_DATA_SIZE 2 * 1000 -#endif - -#define SEED_ARG 0 -#define SEED_FUNC 1 -#define SEED_VOLATILE 2 - -#define MEM_STATIC 0 -#define MEM_MALLOC 1 -#define MEM_STACK 2 - -#include "core_portme.h" - -#if HAS_STDIO -#include -#endif -#if HAS_PRINTF -#define ee_printf printf -#endif - -/* Actual benchmark execution in iterate */ -void *iterate(void *pres); - -/* Typedef: secs_ret - For machines that have floating point support, get number of seconds as - a double. Otherwise an unsigned int. -*/ -#if HAS_FLOAT -typedef double secs_ret; -#else -typedef ee_u32 secs_ret; -#endif - -#if MAIN_HAS_NORETURN -#define MAIN_RETURN_VAL -#define MAIN_RETURN_TYPE void -#else -#define MAIN_RETURN_VAL 0 -#define MAIN_RETURN_TYPE int -#endif - -void start_time(void); -void stop_time(void); -CORE_TICKS get_time(void); -secs_ret time_in_secs(CORE_TICKS ticks); - -/* Misc useful functions */ -ee_u16 crcu8(ee_u8 data, ee_u16 crc); -ee_u16 crc16(ee_s16 newval, ee_u16 crc); -ee_u16 crcu16(ee_u16 newval, ee_u16 crc); -ee_u16 crcu32(ee_u32 newval, ee_u16 crc); -ee_u8 check_data_types(void); -void * portable_malloc(ee_size_t size); -void portable_free(void *p); -ee_s32 parseval(char *valstring); - -/* Algorithm IDS */ -#define ID_LIST (1 << 0) -#define ID_MATRIX (1 << 1) -#define ID_STATE (1 << 2) -#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE) -#define NUM_ALGORITHMS 3 - -/* list data structures */ -typedef struct list_data_s -{ - ee_s16 data16; - ee_s16 idx; -} list_data; - -typedef struct list_head_s -{ - struct list_head_s *next; - struct list_data_s *info; -} list_head; - -/*matrix benchmark related stuff */ -#define MATDAT_INT 1 -#if MATDAT_INT -typedef ee_s16 MATDAT; -typedef ee_s32 MATRES; -#else -typedef ee_f16 MATDAT; -typedef ee_f32 MATRES; -#endif - -typedef struct MAT_PARAMS_S -{ - int N; - MATDAT *A; - MATDAT *B; - MATRES *C; -} mat_params; - -/* state machine related stuff */ -/* List of all the possible states for the FSM */ -typedef enum CORE_STATE -{ - CORE_START = 0, - CORE_INVALID, - CORE_S1, - CORE_S2, - CORE_INT, - CORE_FLOAT, - CORE_EXPONENT, - CORE_SCIENTIFIC, - NUM_CORE_STATES -} core_state_e; - -/* Helper structure to hold results */ -typedef struct RESULTS_S -{ - /* inputs */ - ee_s16 seed1; /* Initializing seed */ - ee_s16 seed2; /* Initializing seed */ - ee_s16 seed3; /* Initializing seed */ - void * memblock[4]; /* Pointer to safe memory location */ - ee_u32 size; /* Size of the data */ - ee_u32 iterations; /* Number of iterations to execute */ - ee_u32 execs; /* Bitmask of operations to execute */ - struct list_head_s *list; - mat_params mat; - /* outputs */ - ee_u16 crc; - ee_u16 crclist; - ee_u16 crcmatrix; - ee_u16 crcstate; - ee_s16 err; - /* ultithread specific */ - core_portable port; -} core_results; - -/* Multicore execution handling */ -#if (MULTITHREAD > 1) -ee_u8 core_start_parallel(core_results *res); -ee_u8 core_stop_parallel(core_results *res); -#endif - -/* list benchmark functions */ -list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); -ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); - -/* state benchmark functions */ -void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); -ee_u16 core_bench_state(ee_u32 blksize, - ee_u8 *memblock, - ee_s16 seed1, - ee_s16 seed2, - ee_s16 step, - ee_u16 crc); - -/* matrix benchmark functions */ -ee_u32 core_init_matrix(ee_u32 blksize, - void * memblk, - ee_s32 seed, - mat_params *p); -ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); diff --git a/benchmarks/riscv-coremark/coremark/coremark.md5 b/benchmarks/riscv-coremark/coremark/coremark.md5 deleted file mode 100644 index 94160db22..000000000 --- a/benchmarks/riscv-coremark/coremark/coremark.md5 +++ /dev/null @@ -1,6 +0,0 @@ -8d082dc4a9676c02731a8cf209339072 core_list_join.c -c984863b84b59185d8b5fb81c1ca7535 core_main.c -5fa21a0f7c3964167c9691db531ca652 core_matrix.c -edcfc7a0b146a50028014f06e6826aa3 core_state.c -45540ba2145adea1ec7ea2c72a1fbbcb core_util.c -8ca974c013b380dc7f0d6d1afb76eb2d coremark.h diff --git a/benchmarks/riscv-coremark/coremark/cygwin/core_portme.c b/benchmarks/riscv-coremark/coremark/cygwin/core_portme.c deleted file mode 100755 index fe8d29983..000000000 --- a/benchmarks/riscv-coremark/coremark/cygwin/core_portme.c +++ /dev/null @@ -1,336 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - #define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/coremark/cygwin/core_portme.h b/benchmarks/riscv-coremark/coremark/cygwin/core_portme.h deleted file mode 100755 index 9471b12ec..000000000 --- a/benchmarks/riscv-coremark/coremark/cygwin/core_portme.h +++ /dev/null @@ -1,293 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef ee_u32 ee_ptr_int; -typedef size_t ee_size_t; -/* align_mem: - This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. -*/ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/coremark/cygwin/core_portme.mak b/benchmarks/riscv-coremark/coremark/cygwin/core_portme.mak deleted file mode 100644 index 97b6d6ace..000000000 --- a/benchmarks/riscv-coremark/coremark/cygwin/core_portme.mak +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/coremark/docs/READM.md b/benchmarks/riscv-coremark/coremark/docs/READM.md deleted file mode 100644 index 6f71f426d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/READM.md +++ /dev/null @@ -1 +0,0 @@ -This folder contains the original, unaltered documents from the CoreMark V1.0 release. diff --git a/benchmarks/riscv-coremark/coremark/docs/balance_O0_joined.png b/benchmarks/riscv-coremark/coremark/docs/balance_O0_joined.png deleted file mode 100644 index 46b41583a..000000000 Binary files a/benchmarks/riscv-coremark/coremark/docs/balance_O0_joined.png and /dev/null differ diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/PIC32/core_portme-mak.html b/benchmarks/riscv-coremark/coremark/docs/html/files/PIC32/core_portme-mak.html deleted file mode 100644 index c222bac2d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/PIC32/core_portme-mak.html +++ /dev/null @@ -1,68 +0,0 @@ - - -core_portme.mak - CoreMark - - - - - - - -

core_portme.mak

Summary
core_portme.mak
Variables
OUTFLAGUse this flag to define how to to get an executable (e.g -o)
CFLAGSUse this flag to define compiler options.
LFLAGS_ENDDefine any libraries needed for linking or other flags that should come at the end of the link line (e.g.
SEPARATE_COMPILEDefine if you need to separate compilation from link stage.
PORT_OBJSPort specific object files can be added here
Build Targets
port_prebuildGenerate any files that are needed before actual build starts.
port_postbuildGenerate any files that are needed after actual build end.
port_postrunDo platform specific after run stuff.
port_prerunDo platform specific after run stuff.
port_postloadDo platform specific after load stuff.
port_preloadDo platform specific before load stuff.
Variables
OPATH
PERLDefine perl executable to calculate the geomean if running separate.
- -

Variables

- -

OUTFLAG

Use this flag to define how to to get an executable (e.g -o)

- -

CFLAGS

Use this flag to define compiler options.  Note, you can add compiler options from the command line using XCFLAGS=”other flags”

- -

LFLAGS_END

Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).  Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.

- -

SEPARATE_COMPILE

Define if you need to separate compilation from link stage.  In this case, you also need to define below how to create an object file, and how to link.

- -

PORT_OBJS

Port specific object files can be added here

- -

Build Targets

- -

port_prebuild

Generate any files that are needed before actual build starts.  E.g. generate profile guidance files.  Sample PGO generation for gcc enabled with PGO=1

  • First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
  • Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
NoteUsing REBUILD=1

Use make PGO=1 to invoke this sample processing.

- -

port_postbuild

Generate any files that are needed after actual build end.  E.g. change format to srec, bin, zip in order to be able to load into flash

- -

port_postrun

Do platform specific after run stuff.  E.g. reset the board, backup the logfiles etc.

- -

port_prerun

Do platform specific after run stuff.  E.g. reset the board, backup the logfiles etc.

- -

port_postload

Do platform specific after load stuff.  E.g. reset the reset power to the flash eraser

- -

port_preload

Do platform specific before load stuff.  E.g. reset the reset power to the flash eraser

- -

Variables

- -

OPATH

Path to the output folder.  Defaultcurrent folder.
- -

PERL

Define perl executable to calculate the geomean if running separate.

- -
- - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/core_list_join-c.html b/benchmarks/riscv-coremark/coremark/docs/html/files/core_list_join-c.html deleted file mode 100644 index 6ee2aeecd..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/core_list_join-c.html +++ /dev/null @@ -1,58 +0,0 @@ - - -/cygdrive/d/dev/code/coremark/core_list_join.c - CoreMark - - - - - - - -

core_list_join.c

Summary
core_list_join.c
DescriptionBenchmark using a linked list.
Functions
cmp_complexCompare the data item in a list cell.
cmp_idxCompare the idx item in a list cell, and regen the data.
core_list_initInitialize list with data.
core_list_insertInsert an item to the list
core_list_removeRemove an item from the list.
core_list_undo_removeUndo a remove operation.
core_list_findFind an item in the list
core_list_reverseReverse a list
core_list_mergesortSort the list in place without recursion.
- -

Description

Benchmark using a linked list.

Linked list is a common data structure used in many applications.

For our purposes, this will excercise the memory units of the processor.  In particular, usage of the list pointers to find and alter data.

We are not using Malloc since some platforms do not support this library.

Instead, the memory block being passed in is used to create a list, and the benchmark takes care not to add more items then can be accomodated by the memory block.  The porting layer will make sure that we have a valid memory block.

All operations are done in place, without using any extra memory.

The list itself contains list pointers and pointers to data items.  Data items contain the following:

idxAn index that captures the initial order of the list.
dataVariable data initialized based on the input parameters.  The 16b are divided as follows:
  • Upper 8b are backup of original data.
  • Bit 7 indicates if the lower 7 bits are to be used as is or calculated.
  • Bits 0-2 indicate type of operation to perform to get a 7b value.
  • Bits 3-6 provide input for the operation.
- -

Functions

- -

cmp_complex

ee_s32 cmp_complex(list_data *a,
list_data *b,
core_results *res)

Compare the data item in a list cell.

Can be used by mergesort.

- -

cmp_idx

ee_s32 cmp_idx(list_data *a,
list_data *b,
core_results *res)

Compare the idx item in a list cell, and regen the data.

Can be used by mergesort.

- -

core_list_init

list_head *core_list_init(ee_u32 blksize,
list_head *memblock,
ee_s16 seed)

Initialize list with data.

Parameters

blksizeSize of memory to be initialized.
memblockPointer to memory block.
seedActual values chosen depend on the seed parameter.  The seed parameter MUST be supplied from a source that cannot be determined at compile time

Returns

Pointer to the head of the list.

- -

core_list_insert

list_head *core_list_insert_new(list_head *insert_point,
list_data *info,
list_head **memblock,
list_data **datablock ,
list_head *memblock_end,
list_data *datablock_end)

Insert an item to the list

Parameters

insert_pointwhere to insert the item.
infodata for the cell.
memblockpointer for the list header
datablockpointer for the list data
memblock_endend of region for list headers
datablock_endend of region for list data

Returns

Pointer to new item.

- -

core_list_remove

list_head *core_list_remove(list_head *item)

Remove an item from the list.

Operation

For a singly linked list, remove by copying the data from the next item over to the current cell, and unlinking the next item.

Note

since there is always a fake item at the end of the list, no need to check for NULL.

Returns

Removed item.

- -

core_list_undo_remove

list_head *core_list_undo_remove(list_head *item_removed,
list_head *item_modified)

Undo a remove operation.

Operation

Since we want each iteration of the benchmark to be exactly the same, we need to be able to undo a remove.  Link the removed item back into the list, and switch the info items.

Parameters

item_removedReturn value from the core_list_remove
item_modifiedList item that was modified during core_list_remove

Returns

The item that was linked back to the list.

- -

core_list_find

list_head *core_list_find(list_head *list,
list_data *info)

Find an item in the list

Operation

Find an item by idx (if not 0) or specific data value

Parameters

listlist head
infoidx or data to find

Returns

Found item, or NULL if not found.

- -

core_list_reverse

list_head *core_list_reverse(list_head *list)

Reverse a list

Operation

Rearrange the pointers so the list is reversed.

Parameters

listlist head
infoidx or data to find

Returns

Found item, or NULL if not found.

- -

core_list_mergesort

list_head *core_list_mergesort(list_head *list,
list_cmp cmp,
core_results *res)

Sort the list in place without recursion.

Description

Use mergesort, as for linked list this is a realistic solution.  Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm.  The sort can either return the list to original order (by idx) , or use the data item to invoke other other algorithms and change the order of the list.

Parameters

listlist to be sorted.
cmpcmp function to use

Returns

New head of the list.

Note

We have a special header for the list that will always be first, but the algorithm could theoretically modify where the list starts.

- -
- - - - - - - - - - -
ee_s32 cmp_complex(list_data *a,
list_data *b,
core_results *res)
Compare the data item in a list cell.
ee_s32 cmp_idx(list_data *a,
list_data *b,
core_results *res)
Compare the idx item in a list cell, and regen the data.
list_head *core_list_init(ee_u32 blksize,
list_head *memblock,
ee_s16 seed)
Initialize list with data.
list_head *core_list_insert_new(list_head *insert_point,
list_data *info,
list_head **memblock,
list_data **datablock ,
list_head *memblock_end,
list_data *datablock_end)
Insert an item to the list
list_head *core_list_remove(list_head *item)
Remove an item from the list.
list_head *core_list_undo_remove(list_head *item_removed,
list_head *item_modified)
Undo a remove operation.
list_head *core_list_find(list_head *list,
list_data *info)
Find an item in the list
list_head *core_list_reverse(list_head *list)
Reverse a list
list_head *core_list_mergesort(list_head *list,
list_cmp cmp,
core_results *res)
Sort the list in place without recursion.
- - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/core_main-c.html b/benchmarks/riscv-coremark/coremark/docs/html/files/core_main-c.html deleted file mode 100644 index 847744131..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/core_main-c.html +++ /dev/null @@ -1,42 +0,0 @@ - - -core_main.c - CoreMark - - - - - - - -

core_main.c

This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.

Summary
core_main.cThis file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
Functions
iterateRun the benchmark for a specified number of iterations.
mainMain entry routine for the benchmark.
- -

Functions

- -

iterate

Run the benchmark for a specified number of iterations.

Operation

For each type of benchmarked algorithm: a - Initialize the data block for the algorithm. b - Execute the algorithm N times.

Returns

NULL.

- -

main

#if MAIN_HAS_NOARGC MAIN_RETURN_TYPE main(void)

Main entry routine for the benchmark.  This function is responsible for the following steps:

1Initialize input seeds from a source that cannot be determined at compile time.
2Initialize memory block for use.
3Run and time the benchmark.
4Report results, testing the validity of the output if the seeds are known.

Arguments

1first seed : Any value
2second seed : Must be identical to first for iterations to be identical
3third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32.
4Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs
- -
- - - - - - - - - - -
#if MAIN_HAS_NOARGC MAIN_RETURN_TYPE main(void)
Main entry routine for the benchmark.
- - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/core_matrix-c.html b/benchmarks/riscv-coremark/coremark/docs/html/files/core_matrix-c.html deleted file mode 100644 index 2ad041b71..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/core_matrix-c.html +++ /dev/null @@ -1,56 +0,0 @@ - - -/cygdrive/d/dev/code/coremark/core_matrix.c - CoreMark - - - - - - - -

core_matrix.c

Summary
core_matrix.c
DescriptionMatrix manipulation benchmark
Functions
core_bench_matrixBenchmark function
matrix_testPerform matrix manipulation.
matrix_sumCalculate a function that depends on the values of elements in the matrix.
matrix_mul_constMultiply a matrix by a constant.
matrix_add_constAdd a constant value to all elements of a matrix.
matrix_mul_vectMultiply a matrix by a vector.
matrix_mul_matrixMultiply a matrix by a matrix.
matrix_mul_matrix_bitextractMultiply a matrix by a matrix, and extract some bits from the result.
- -

Description

Matrix manipulation benchmark

This very simple algorithm forms the basis of many more complex algorithms.

The tight inner loop is the focus of many optimizations (compiler as well as hardware based) and is thus relevant for embedded processing.

The total available data space will be divided to 3 parts

NxN Matrix Ainitialized with small values (upper 3/4 of the bits all zero).
NxN Matrix Binitialized with medium values (upper half of the bits all zero).
NxN Matrix Cused for the result.

The actual values for A and B must be derived based on input that is not available at compile time.

- -

Functions

- -

core_bench_matrix

ee_u16 core_bench_matrix(mat_params *p,
ee_s16 seed,
ee_u16 crc)

Benchmark function

Iterate matrix_test N times, changing the matrix values slightly by a constant amount each time.

- -

matrix_test

ee_s16 matrix_test(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B,
MATDAT val)

Perform matrix manipulation.

Parameters

NDimensions of the matrix.
Cmemory for result matrix.
Ainput matrix
Boperator matrix (not changed during operations)

Returns

A CRC value that captures all results calculated in the function.  In particular, crc of the value calculated on the result matrix after each step by matrix_sum.

Operation

1Add a constant value to all elements of a matrix.
2Multiply a matrix by a constant.
3Multiply a matrix by a vector.
4Multiply a matrix by a matrix.
5Add a constant value to all elements of a matrix.

After the last step, matrix A is back to original contents.

- -

matrix_sum

ee_s16 matrix_sum(ee_u32 N,
MATRES *C,
MATDAT clipval)

Calculate a function that depends on the values of elements in the matrix.

For each element, accumulate into a temporary variable.

As long as this value is under the parameter clipval, add 1 to the result if the element is bigger then the previous.

Otherwise, reset the accumulator and add 10 to the result.

- -

matrix_mul_const

void matrix_mul_const(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT val)

Multiply a matrix by a constant.  This could be used as a scaler for instance.

- -

matrix_add_const

void matrix_add_const(ee_u32 N,
MATDAT *A,
MATDAT val)

Add a constant value to all elements of a matrix.

- -

matrix_mul_vect

void matrix_mul_vect(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)

Multiply a matrix by a vector.  This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.)

- -

matrix_mul_matrix

void matrix_mul_matrix(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)

Multiply a matrix by a matrix.  Basic code is used in many algorithms, mostly with minor changes such as scaling.

- -

matrix_mul_matrix_bitextract

void matrix_mul_matrix_bitextract(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)

Multiply a matrix by a matrix, and extract some bits from the result.  Basic code is used in many algorithms, mostly with minor changes such as scaling.

- -
- - - - - - - - - - -
ee_u16 core_bench_matrix(mat_params *p,
ee_s16 seed,
ee_u16 crc)
Benchmark function
ee_s16 matrix_test(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B,
MATDAT val)
Perform matrix manipulation.
ee_s16 matrix_sum(ee_u32 N,
MATRES *C,
MATDAT clipval)
Calculate a function that depends on the values of elements in the matrix.
void matrix_mul_const(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT val)
Multiply a matrix by a constant.
void matrix_add_const(ee_u32 N,
MATDAT *A,
MATDAT val)
Add a constant value to all elements of a matrix.
void matrix_mul_vect(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a vector.
void matrix_mul_matrix(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a matrix.
void matrix_mul_matrix_bitextract(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a matrix, and extract some bits from the result.
- - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/core_state-c.html b/benchmarks/riscv-coremark/coremark/docs/html/files/core_state-c.html deleted file mode 100644 index 9f8035990..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/core_state-c.html +++ /dev/null @@ -1,46 +0,0 @@ - - -/cygdrive/d/dev/code/coremark/core_state.c - CoreMark - - - - - - - -

core_state.c

Summary
core_state.c
DescriptionSimple state machines like this one are used in many embedded products.
Functions
core_bench_stateBenchmark function
core_init_stateInitialize the input data for the state machine.
core_state_transitionActual state machine.
- -

Description

Simple state machines like this one are used in many embedded products.

For more complex state machines, sometimes a state transition table implementation is used instead, trading speed of direct coding for ease of maintenance.

Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour, we are using a small moore machine.

In particular, this machine tests type of string input, trying to determine whether the input is a number or something else.  (see core_state).

core_state
- -

Functions

- -

core_bench_state

ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)

Benchmark function

Go over the input twice, once direct, and once after introducing some corruption.

- -

core_init_state

void core_init_state(ee_u32 size,
ee_s16 seed,
ee_u8 *p)

Initialize the input data for the state machine.

Populate the input with several predetermined strings, interspersed.  Actual patterns chosen depend on the seed parameter.

Note

The seed parameter MUST be supplied from a source that cannot be determined at compile time

- -

core_state_transition

enum CORE_STATE core_state_transition(ee_u8 **instr ,
ee_u32 *transition_count)

Actual state machine.

The state machine will continue scanning until either

1an invalid input is detcted.
2a valid number has been detected.

The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid).

- -
- - - - - - - - - - -
ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)
Benchmark function
void core_init_state(ee_u32 size,
ee_s16 seed,
ee_u8 *p)
Initialize the input data for the state machine.
enum CORE_STATE core_state_transition(ee_u8 **instr ,
ee_u32 *transition_count)
Actual state machine.
- - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/core_util-c.html b/benchmarks/riscv-coremark/coremark/docs/html/files/core_util-c.html deleted file mode 100644 index 3ebdb3879..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/core_util-c.html +++ /dev/null @@ -1,42 +0,0 @@ - - -/cygdrive/d/dev/code/coremark/core_util.c - CoreMark - - - - - - - -

core_util.c

Summary
core_util.c
Functions
get_seedGet a values that cannot be determined at compile time.
crc*Service functions to calculate 16b CRC code.
- -

Functions

- -

get_seed

Get a values that cannot be determined at compile time.

Since different embedded systems and compilers are used, 3 different methods are provided

1Using a volatile variable.  This method is only valid if the compiler is forced to generate code that reads the value of a volatile variable from memory at run time.  Please note, if using this method, you would need to modify core_portme.c to generate training profile.
2Command line arguments.  This is the preferred method if command line arguments are supported.
3System function.  If none of the first 2 methods is available on the platform, a system function which is not a stub can be used.

e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions.

- -

crc*

Service functions to calculate 16b CRC code.

- -
- - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/coremark-h.html b/benchmarks/riscv-coremark/coremark/docs/html/files/coremark-h.html deleted file mode 100644 index 337bc1a0c..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/coremark-h.html +++ /dev/null @@ -1,46 +0,0 @@ - - -/cygdrive/d/dev/code/coremark/coremark.h - CoreMark - - - - - - - -

coremark.h

Summary
coremark.h
DescriptionThis file contains declarations of the various benchmark functions.
Configuration
TOTAL_DATA_SIZEDefine total size for data algorithms will operate on
Types
secs_retFor machines that have floating point support, get number of seconds as a double.
- -

Description

This file contains declarations of the various benchmark functions.

- -

Configuration

- -

TOTAL_DATA_SIZE

Define total size for data algorithms will operate on

- -

Types

- -

secs_ret

For machines that have floating point support, get number of seconds as a double.  Otherwise an unsigned int.

- -
- - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/docs/core_state.png b/benchmarks/riscv-coremark/coremark/docs/html/files/docs/core_state.png deleted file mode 100644 index 9b5a4ea60..000000000 Binary files a/benchmarks/riscv-coremark/coremark/docs/html/files/docs/core_state.png and /dev/null differ diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-c.html b/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-c.html deleted file mode 100644 index c8fd8124d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-c.html +++ /dev/null @@ -1,58 +0,0 @@ - - -core_portme.c - CoreMark - - - - - - - -

core_portme.c

Summary
core_portme.c
portable_mallocProvide malloc() functionality in a platform specific way.
portable_freeProvide free() functionality in a platform specific way.
TIMER_RES_DIVIDERDivider to trade off timer resolution and total time that can be measured.
start_timeThis function will be called right before starting the timed portion of the benchmark.
stop_timeThis function will be called right after ending the timed portion of the benchmark.
get_timeReturn an abstract “ticks” number that signifies time on the system.
time_in_secsConvert the value returned by get_time to seconds.
portable_initTarget specific initialization code Test for some common mistakes.
portable_finiTarget specific final code
core_start_parallelStart benchmarking in a parallel context.
core_stop_parallelStop a parallel context execution of coremark, and gather the results.
- -

portable_malloc

void *portable_malloc(size_t size)

Provide malloc() functionality in a platform specific way.

- -

portable_free

void portable_free(void *p)

Provide free() functionality in a platform specific way.

- -

TIMER_RES_DIVIDER

Divider to trade off timer resolution and total time that can be measured.

Use lower values to increase resolution, but make sure that overflow does not occur.  If there are issues with the return value overflowing, increase this value.

- -

start_time

void start_time(void)

This function will be called right before starting the timed portion of the benchmark.

Implementation may be capturing a system timer (as implemented in the example code) or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.

- -

stop_time

void stop_time(void)

This function will be called right after ending the timed portion of the benchmark.

Implementation may be capturing a system timer (as implemented in the example code) or other system parameters - e.g. reading the current value of cpu cycles counter.

- -

get_time

CORE_TICKS get_time(void)

Return an abstract “ticks” number that signifies time on the system.

Actual value returned may be cpu cycles, milliseconds or any other value, as long as it can be converted to seconds by time_in_secs.  This methodology is taken to accomodate any hardware or simulated platform.  The sample implementation returns millisecs by default, and the resolution is controlled by TIMER_RES_DIVIDER

- -

time_in_secs

secs_ret time_in_secs(CORE_TICKS ticks)

Convert the value returned by get_time to seconds.

The secs_ret type is used to accomodate systems with no support for floating point.  Default implementation implemented by the EE_TICKS_PER_SEC macro above.

- -

portable_init

void portable_init(core_portable *p,
int *argc,
char *argv[])

Target specific initialization code Test for some common mistakes.

- -

portable_fini

void portable_fini(core_portable *p)

Target specific final code

- -

core_start_parallel

Start benchmarking in a parallel context.

Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.  Other implementations using MCAPI or other standards can easily be devised.

- -

core_stop_parallel

Stop a parallel context execution of coremark, and gather the results.

Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.  Other implementations using MCAPI or other standards can easily be devised.

- -
- - - - - - - - - - -
void *portable_malloc(size_t size)
Provide malloc() functionality in a platform specific way.
void portable_free(void *p)
Provide free() functionality in a platform specific way.
void start_time(void)
This function will be called right before starting the timed portion of the benchmark.
void stop_time(void)
This function will be called right after ending the timed portion of the benchmark.
CORE_TICKS get_time(void)
Return an abstract “ticks” number that signifies time on the system.
secs_ret time_in_secs(CORE_TICKS ticks)
Convert the value returned by get_time to seconds.
void portable_init(core_portable *p,
int *argc,
char *argv[])
Target specific initialization code Test for some common mistakes.
void portable_fini(core_portable *p)
Target specific final code
Divider to trade off timer resolution and total time that can be measured.
For machines that have floating point support, get number of seconds as a double.
- - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-h.html b/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-h.html deleted file mode 100644 index 90810f13d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-h.html +++ /dev/null @@ -1,72 +0,0 @@ - - -core_portme.h - CoreMark - - - - - - - -

core_portme.h

Summary
core_portme.h
DescriptionThis file contains configuration constants required to execute on different platforms
Configuration
HAS_FLOATDefine to 1 if the platform supports floating point.
HAS_TIME_HDefine to 1 if platform has the time.h header file, and implementation of functions thereof.
USE_CLOCKDefine to 1 if platform has the time.h header file, and implementation of functions thereof.
HAS_STDIODefine to 1 if the platform has stdio.h.
HAS_PRINTFDefine to 1 if the platform has stdio.h and implements the printf function.
CORE_TICKSDefine type of return from the timing functions.
SEED_METHODDefines method to get seed values that cannot be computed at compile time.
MEM_METHODDefines method to get a block of memry.
MULTITHREADDefine for parallel execution
USE_PTHREADSample implementation for launching parallel contexts This implementation uses pthread_thread_create and pthread_join.
USE_FORKSample implementation for launching parallel contexts This implementation uses fork, waitpid, shmget,shmat and shmdt.
USE_SOCKETSample implementation for launching parallel contexts This implementation uses fork, socket, sendto and recvfrom
MAIN_HAS_NOARGCNeeded if platform does not support getting arguments to main.
MAIN_HAS_NORETURNNeeded if platform does not support returning a value from main.
Variables
default_num_contextsNumber of contexts to spawn in multicore context.
- -

Description

This file contains configuration constants required to execute on different platforms

- -

Configuration

- -

HAS_FLOAT

Define to 1 if the platform supports floating point.

- -

HAS_TIME_H

Define to 1 if platform has the time.h header file, and implementation of functions thereof.

- -

USE_CLOCK

Define to 1 if platform has the time.h header file, and implementation of functions thereof.

- -

HAS_STDIO

Define to 1 if the platform has stdio.h.

- -

HAS_PRINTF

Define to 1 if the platform has stdio.h and implements the printf function.

- -

CORE_TICKS

Define type of return from the timing functions.

- -

SEED_METHOD

Defines method to get seed values that cannot be computed at compile time.

Valid values

SEED_ARGfrom command line.
SEED_FUNCfrom a system function.
SEED_VOLATILEfrom volatile variables.
- -

MEM_METHOD

Defines method to get a block of memry.

Valid values

MEM_MALLOCfor platforms that implement malloc and have malloc.h.
MEM_STATICto use a static memory array.
MEM_STACKto allocate the data block on the stack (NYI).
- -

MULTITHREAD

Define for parallel execution

Valid values

1only one context (default).
N>1will execute N copies in parallel.

Note

If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.

Two sample implementations are provided.  Use USE_PTHREAD or USE_FORK to enable them.

It is valid to have a different implementation of core_start_parallel and <core_end_parallel> in core_portme.c, to fit a particular architecture.

- -

USE_PTHREAD

Sample implementation for launching parallel contexts This implementation uses pthread_thread_create and pthread_join.

Valid values

0Do not use pthreads API.
1Use pthreads API

Note

This flag only matters if MULTITHREAD has been defined to a value greater then 1.

- -

USE_FORK

Sample implementation for launching parallel contexts This implementation uses fork, waitpid, shmget,shmat and shmdt.

Valid values

0Do not use fork API.
1Use fork API

Note

This flag only matters if MULTITHREAD has been defined to a value greater then 1.

- -

USE_SOCKET

Sample implementation for launching parallel contexts This implementation uses fork, socket, sendto and recvfrom

Valid values

0Do not use fork and sockets API.
1Use fork and sockets API

Note

This flag only matters if MULTITHREAD has been defined to a value greater then 1.

- -

MAIN_HAS_NOARGC

Needed if platform does not support getting arguments to main.

Valid values

0argc/argv to main is supported
1argc/argv to main is not supported
- -

MAIN_HAS_NORETURN

Needed if platform does not support returning a value from main.

Valid values

0main returns an int, and return value will be 0.
1platform does not support returning a value from main
- -

Variables

- -

default_num_contexts

extern ee_u32 default_num_contexts

Number of contexts to spawn in multicore context.  Override this global value to change number of contexts used.

Note

This value may not be set higher then the MULTITHREAD define.

To experiment, you can set the MULTITHREAD define to the highest value expected, and use argc/argv in the portable_init to set this value from the command line.

- -
- - - - - - - - - - -
extern ee_u32 default_num_contexts
Number of contexts to spawn in multicore context.
Sample implementation for launching parallel contexts This implementation uses pthread_thread_create and pthread_join.
Sample implementation for launching parallel contexts This implementation uses fork, waitpid, shmget,shmat and shmdt.
Start benchmarking in a parallel context.
Define for parallel execution
void portable_init(core_portable *p,
int *argc,
char *argv[])
Target specific initialization code Test for some common mistakes.
- - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-mak.html b/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-mak.html deleted file mode 100644 index ffd6cbe66..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/linux/core_portme-mak.html +++ /dev/null @@ -1,76 +0,0 @@ - - -core_portme.mak - CoreMark - - - - - - - -

core_portme.mak

Summary
core_portme.mak
Variables
OUTFLAGUse this flag to define how to to get an executable (e.g -o)
CCUse this flag to define compiler to use
CFLAGSUse this flag to define compiler options.
LFLAGS_ENDDefine any libraries needed for linking or other flags that should come at the end of the link line (e.g.
PORT_SRCSPort specific source files can be added here
LOADDefine this flag if you need to load to a target, as in a cross compile environment.
RUNDefine this flag if running does not consist of simple invocation of the binary.
SEPARATE_COMPILEDefine if you need to separate compilation from link stage.
PORT_OBJSPort specific object files can be added here
Build Targets
port_prebuildGenerate any files that are needed before actual build starts.
port_postbuildGenerate any files that are needed after actual build end.
port_postrunDo platform specific after run stuff.
port_prerunDo platform specific after run stuff.
port_postloadDo platform specific after load stuff.
port_preloadDo platform specific before load stuff.
Variables
OPATH
PERLDefine perl executable to calculate the geomean if running separate.
- -

Variables

- -

OUTFLAG

Use this flag to define how to to get an executable (e.g -o)

- -

CC

Use this flag to define compiler to use

- -

CFLAGS

Use this flag to define compiler options.  Note, you can add compiler options from the command line using XCFLAGS=”other flags”

- -

LFLAGS_END

Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).  Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.

- -

PORT_SRCS

Port specific source files can be added here

- -

LOAD

Define this flag if you need to load to a target, as in a cross compile environment.

- -

RUN

Define this flag if running does not consist of simple invocation of the binary.  In a cross compile environment, you need to define this.

- -

SEPARATE_COMPILE

Define if you need to separate compilation from link stage.  In this case, you also need to define below how to create an object file, and how to link.

- -

PORT_OBJS

Port specific object files can be added here

- -

Build Targets

- -

port_prebuild

Generate any files that are needed before actual build starts.  E.g. generate profile guidance files.  Sample PGO generation for gcc enabled with PGO=1

  • First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
  • Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
NoteUsing REBUILD=1

Use make PGO=1 to invoke this sample processing.

- -

port_postbuild

Generate any files that are needed after actual build end.  E.g. change format to srec, bin, zip in order to be able to load into flash

- -

port_postrun

Do platform specific after run stuff.  E.g. reset the board, backup the logfiles etc.

- -

port_prerun

Do platform specific after run stuff.  E.g. reset the board, backup the logfiles etc.

- -

port_postload

Do platform specific after load stuff.  E.g. reset the reset power to the flash eraser

- -

port_preload

Do platform specific before load stuff.  E.g. reset the reset power to the flash eraser

- -

Variables

- -

OPATH

Path to the output folder.  Defaultcurrent folder.
- -

PERL

Define perl executable to calculate the geomean if running separate.

- -
- - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/readme-txt.html b/benchmarks/riscv-coremark/coremark/docs/html/files/readme-txt.html deleted file mode 100644 index 2b57f37f4..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/readme-txt.html +++ /dev/null @@ -1,71 +0,0 @@ - - -CoreMark - - - - - - - -

CoreMark

Summary
CoreMark
WelcomeCopyright © 2009 EEMBC All rights reserved.
Building and runningDownload the release files from the www.coremark.org.
DocumentationWhen you unpack the documentation (tar -vzxf coremark_<version>_docs.tgz) a docs folder will be created.
Submitting resultsCoreMark results can be submitted on the web.
Run rulesWhat is and is not allowed.
Reporting rulesHow to report results on a data sheet?
Log File FormatThe log files have the following format
LegalSee LICENSE.txt or the word document file under docs/LICENSE.doc.
CreditsMany thanks to all of the individuals who helped with the development or testing of CoreMark including (Sorted by company name)
- -

Welcome

Copyright © 2009 EEMBC All rights reserved.  CoreMark is a trademark of EEMBC and EEMBC is a registered trademark of the Embedded Microprocessor Benchmark Consortium.

CoreMark’s primary goals are simplicity and providing a method for testing only a processor’s core features.

For more information about EEMBC’s comprehensive embedded benchmark suites, please see www.eembc.org.

- -

Building and running

Download the release files from the www.coremark.org.  You can verify the download using the coremark_<version>.md5 file

md5sum -c coremark_<version>.md5

Unpack the distribution (tar -vzxf coremark_<version>.tgz && tar -vzxf coremark_<version>_docs.tgz) then change to the coremark_<version> folder.

To build and run the benchmark, type

make

Full results are available in the files run1.log and run2.log.  CoreMark result can be found in run1.log.

For self hosted Linux or Cygwin platforms, a simple make should work.

Cross Compile

For cross compile platforms please adjust core_portme.mak, core_portme.h (and possibly core_portme.c) according to the specific platform used.  When porting to a new platform, it is recommended to copy one of the default port folders (e.g. mkdir <platform> && cp linux/* <platform>), adjust the porting files, and run

make PORT_DIR=<platform>

Systems without make

The following files need to be compiled:

For example

gcc -O2 -o coremark.exe core_list_join.c core_main.c core_matrix.c core_state.c core_util.c simple/core_portme.c -DPERFORMANCE_RUN=1 -DITERATIONS=1000
-./coremark.exe > run1.log

The above will compile the benchmark for a performance run and 1000 iterations.  Output is redirected to run1.log.

Make targets

runDefault target, creates run1.log and run2.log.
run1.logRun the benchmark with performance parameters, and output to run1.log
run2.logRun the benchmark with validation parameters, and output to run2.log
run3.logRun the benchmark with profile generation parameters, and output to run3.log
compilecompile the benchmark executable
linklink the benchmark executable
checktest MD5 of sources that may not be modified
cleanclean temporary files

ITERATIONS

By default, the benchmark will run between 10-100 seconds.  To override, use ITERATIONS=N

make ITERATIONS=10

Will run the benchmark for 10 iterations.  It is recommended to set a specific number of iterations in certain situations e.g.:

  • Running with a simulator
  • Measuring power/energy
  • Timing cannot be restarted

Minimum required run time

Results are only valid for reporting if the benchmark ran for at least 10 secs!

XCFLAGS

To add compiler flags from the command line, use XCFLAGS e.g.

make XCFLAGS="-g -DMULTITHREAD=4 -DUSE_FORK=1"
  • CORE_DEBUG

Define to compile for a debug run if you get incorrect CRC.

make XCFLAGS="-DCORE_DEBUG=1"
  • Parallel Execution

Use XCFLAGS=-DMULTITHREAD=N where N is number of threads to run in parallel.  Several implementations are available to execute in multiple contexts, or you can implement your own in core_portme.c.

make XCFLAGS="-DMULTITHREAD=4 -DUSE_PTHREAD"

Above will compile the benchmark for execution on 4 cores, using POSIX Threads API.

REBUILD

To force rebuild, add the flag REBUILD to the command line

make REBUILD=1

Check core_portme.mak for more important options.

Run parameters for the benchmark executable

Coremark executable takes several parameters as follows (if main accepts arguments).  1st - A seed value used for initialization of data.  2nd - A seed value used for initialization of data.  3rd - A seed value used for initialization of data.  4th - Number of iterations (0 for auto : default value) 5th - Reserved for internal use.  6th - Reserved for internal use.  7th - For malloc users only, ovreride the size of the input data buffer.

The run target from make will run coremark with 2 different data initialization seeds.

Alternative parameters

If not using malloc or command line arguments are not supported, the buffer size for the algorithms must be defined via the compiler define TOTAL_DATA_SIZE.  TOTAL_DATA_SIZE must be set to 2000 bytes (default) for standard runs.  The default for such a target when testing different configurations could be ...

make XCFLAGS="-DTOTAL_DATA_SIZE=6000 -DMAIN_HAS_NOARGC=1"
- -

Documentation

When you unpack the documentation (tar -vzxf coremark_<version>_docs.tgz) a docs folder will be created.  Check the file docs/html/index.html and the website http://www.coremark.org for more info.

- -

Submitting results

CoreMark results can be submitted on the web.

Open a web browser and go to http://www.coremark.org- /benchmark- /index.php?pg=benchmark Select the link to add a new score and follow the instructions.

- -

Run rules

What is and is not allowed.

Required

1The benchmark needs to run for at least 10 seconds.
2All validation must succeed for seeds 0,0,0x66 and 0x3415,0x3415,0x66, buffer size of 2000 bytes total.
  • If not using command line arguments to main:
make XCFLAGS="-DPERFORMANCE_RUN=1" REBUILD=1 run1.log
-make XCFLAGS="-DVALIDATION_RUN=1" REBUILD=1 run2.log
3If using profile guided optimization, profile must be generated using seeds of 8,8,8, and buffer size of 1200 bytes total.
make XCFLAGS="-DTOTAL_DATA_SIZE=1200 -DPROFILE_RUN=1" REBUILD=1 run3.log
4All source files must be compiled with the same flags.
5All data type sizes must match size in bits such that:
  • ee_u8 is an 8 bits datatype.
  • ee_s16 is an 16 bits datatype.
  • ee_u16 is an 16 bits datatype.
  • ee_s32 is an 32 bits datatype.
  • ee_u32 is an 32 bits datatype.

Allowed

  • Changing number of iterations
  • Changing toolchain and build/load/run options
  • Changing method of acquiring a data memory block
  • Changing the method of acquiring seed values
  • Changing implementation in core_portme.c
  • Changing configuration values in core_portme.h
  • Changing core_portme.mak

Not allowed

  • Changing of source file other then core_portme* (use make check to validate)
- -

Reporting rules

How to report results on a data sheet?

CoreMark 1.0 : N / C [/ P] [/ M]

NNumber of iterations per second with seeds 0,0,0x66,size=2000)
CCompiler version and flags
PParameters such as data and code allocation specifics
  • This parameter may be omitted if all data was allocated on the heap in RAM.
  • This parameter may not be omitted when reporting CoreMark/MHz
MType of parallel execution (if used) and number of contexts This parameter may be omitted if parallel execution was not used.

e.g.

CoreMark 1.0 : 128 / GCC 4.1.2 -O2 -fprofile-use / Heap in TCRAM / FORK:2

or

CoreMark 1.0 : 1400 / GCC 3.4 -O4

If reporting scaling results, the results must be reported as follows

CoreMark/MHz 1.0 : N / C / P [/ M]

PWhen reporting scaling results, memory parameter must also indicate memory frequency:core frequency ratio.
  • If the core has cache and cache frequency to core frequency ratio is configurable, that must also be included.

e.g.

CoreMark/MHz 1.0 : 1.47 / GCC 4.1.2 -O2 / DDR3(Heap) 30:1 Memory 1:1 Cache
- -

Log File Format

The log files have the following format

2K performance run parameters for coremark. (Run type)
-CoreMark Size       : 666                   (Buffer size)
-Total ticks         : 25875                 (platform dependent value)
-Total time (secs)   : 25.875000             (actual time in seconds)
-Iterations/Sec      : 3864.734300           (Performance value to report)
-Iterations          : 100000                (number of iterations used)
-Compiler version    : GCC3.4.4              (Compiler and version)
-Compiler flags      : -O2                   (Compiler and linker flags)
-Memory location     : Code in flash, data in on chip RAM
-seedcrc             : 0xe9f5                (identifier for the input seeds)
-[0]crclist          : 0xe714                (validation for list part)
-[0]crcmatrix        : 0x1fd7                (validation for matrix part)
-[0]crcstate         : 0x8e3a                (validation for state part)
-[0]crcfinal         : 0x33ff                (iteration dependent output)
-Correct operation validated. See README.md for run and reporting rules.  (*Only when run is successful*)
-CoreMark 1.0 : 6508.490622 / GCC3.4.4 -O2 / Heap                          (*Only on a successful performance run*)
- -

Legal

See LICENSE.txt or the word document file under docs/LICENSE.doc.  For more information on your legal rights to use this benchmark, please see http://www.coremark.org- /download- /register.php?pg=register

- -

Credits

Many thanks to all of the individuals who helped with the development or testing of CoreMark including (Sorted by company name)

  • Alan Anderson, ADI
  • Adhikary Rajiv, ADI
  • Elena Stohr, ARM
  • Ian Rickards, ARM
  • Andrew Pickard, ARM
  • Trent Parker, CAVIUM
  • Shay Gal-On, EEMBC
  • Markus Levy, EEMBC
  • Ron Olson, IBM
  • Eyal Barzilay, MIPS
  • Jens Eltze, NEC
  • Hirohiko Ono, NEC
  • Ulrich Drees, NEC
  • Frank Roscheda, NEC
  • Rob Cosaro, NXP
  • Shumpei Kawasaki, RENESAS
- -
- - - - - - - - - - -
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
- - - - - - - - diff --git a/benchmarks/riscv-coremark/coremark/docs/html/files/release_notes-txt.html b/benchmarks/riscv-coremark/coremark/docs/html/files/release_notes-txt.html deleted file mode 100644 index 6658c7141..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/files/release_notes-txt.html +++ /dev/null @@ -1,56 +0,0 @@ - - -Release Notes - CoreMark - - - - - - - -

Release Notes

Version: 1.01

History

Version 1.01

  • Added validation testing the sizes of datatypes.

Version 1.00

  • First public version.

Validation

This release was tested on the following platforms

  • x86 cygwin and gcc 3.4 (Quad, dual and single core systems)
  • x86 linux (Ubuntu/Fedora) and gcc (4.2/4.1) (Quad and single core systems)
  • MIPS64 BE linux and gcc 3.4 16 cores system
  • MIPS32 BE linux with CodeSourcery compiler 4.2-177 on Malta/Linux with a 1004K 3-core system
  • PPC simulator with gcc 4.2.2 (No OS)
  • PPC 64b BE linux (yellowdog) with gcc 3.4 and 4.1 (Dual core system)
  • BF533 with VDSP50
  • Renesas R8C/H8 MCU with HEW 4.05
  • NXP LPC1700 armcc v4.0.0.524
  • NEC 78K with IAR v4.61
  • ARM simulator with armcc v4

Coverage

GCOV results can be found on SVN under cover.

Memory analysis

Valgrind 3.4.0 used and no errors reported.

Balance analysis

Number of instructions executed for each function tested with cachegrind and found balanced with gcc and -O0.

Statistics

Lines

Lines  Blank  Cmnts  Source     AESL
-=====  =====  =====  =====  ==========  =======================================
-  469     66    170    251       627.5  core_list_join.c  (C)
-  330     18     54    268       670.0  core_main.c  (C)
-  256     32     80    146       365.0  core_matrix.c  (C)
-  240     16     51    186       465.0  core_state.c  (C)
-  165     11     20    134       335.0  core_util.c  (C)
-  150     23     36     98       245.0  coremark.h  (C)
- 1610    166    411   1083      2707.5  ----- Benchmark -----  (6 files)
-  293     15     74    212       530.0  linux/core_portme.c  (C)
-  235     30    104    104       260.0  linux/core_portme.h  (C)
-  528     45    178    316       790.0  ----- Porting -----  (2 files)
-
-
-* For comparison, here are the stats for Dhrystone
-Lines  Blank  Cmnts  Source     AESL
-=====  =====  =====  =====  ==========  =======================================
-  311     15    242     54       135.0  dhry.h  (C)
-  789    132    119    553      1382.5  dhry_1.c  (C)
-  186     26     68    107       267.5  dhry_2.c  (C)
- 1286    173    429    714      1785.0  ----- C -----  (3 files)
- -
- - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index.html b/benchmarks/riscv-coremark/coremark/docs/html/index.html deleted file mode 100644 index f7a88682a..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index.html +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/BuildTargets.html b/benchmarks/riscv-coremark/coremark/docs/html/index/BuildTargets.html deleted file mode 100644 index 635c0ff78..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/BuildTargets.html +++ /dev/null @@ -1,31 +0,0 @@ - - -Build Target Index - CoreMark - - - - - - - -
Build Target Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
P
 port_postbuild
 port_postload
 port_postrun
 port_prebuild
 port_preload
 port_prerun
- -
Generate any files that are needed after actual build end.
Do platform specific after load stuff.
Do platform specific after run stuff.
Generate any files that are needed before actual build starts.
Do platform specific before load stuff.
Do platform specific after run stuff.
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/Configuration.html b/benchmarks/riscv-coremark/coremark/docs/html/index/Configuration.html deleted file mode 100644 index 8e5ef3aab..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/Configuration.html +++ /dev/null @@ -1,51 +0,0 @@ - - -Configuration Index - CoreMark - - - - - - - -
Configuration Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
C
 CORE_TICKS
H
 HAS_FLOAT
 HAS_PRINTF
 HAS_STDIO
 HAS_TIME_H
M
 MAIN_HAS_NOARGC
 MAIN_HAS_NORETURN
 MEM_METHOD
 MULTITHREAD
S
 SEED_METHOD
T
 TOTAL_DATA_SIZE
U
 USE_CLOCK
 USE_FORK
 USE_PTHREAD
 USE_SOCKET
- -
Define type of return from the timing functions.
- - - -
Define to 1 if the platform supports floating point.
Define to 1 if the platform has stdio.h and implements the printf function.
Define to 1 if the platform has stdio.h.
Define to 1 if platform has the time.h header file, and implementation of functions thereof.
- - - -
Needed if platform does not support getting arguments to main.
Needed if platform does not support returning a value from main.
Defines method to get a block of memry.
Define for parallel execution
- - - -
Defines method to get seed values that cannot be computed at compile time.
- - - -
Define total size for data algorithms will operate on
- - - -
Define to 1 if platform has the time.h header file, and implementation of functions thereof.
Sample implementation for launching parallel contexts This implementation uses fork, waitpid, shmget,shmat and shmdt.
Sample implementation for launching parallel contexts This implementation uses pthread_thread_create and pthread_join.
Sample implementation for launching parallel contexts This implementation uses fork, socket, sendto and recvfrom
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/Configurations.html b/benchmarks/riscv-coremark/coremark/docs/html/index/Configurations.html deleted file mode 100644 index 0faee64a0..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/Configurations.html +++ /dev/null @@ -1,45 +0,0 @@ - - -Configuration Index - - - - - - - - - -
Configuration Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
H
 HAS_FLOAT
 HAS_STDIO
 HAS_TIME_H
M
 MEM_METHOD
S
 SEED_METHOD
T
 TOTAL_DATA_SIZE
- -
Define to 1 if the platform supports floating point.
Define to 1 if the platform has stdio.h and implements the printf function.
Define to 1 if platform has the time.h header file, and implementation of functions thereof.
- - - -
Defines method to get a block of memry.
- - - -
Defines method to get seed values that cannot be computed at compile time.
- - - -
Define total size for data algorithms will operate on
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/Files.html b/benchmarks/riscv-coremark/coremark/docs/html/index/Files.html deleted file mode 100644 index 7e6d2fa04..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/Files.html +++ /dev/null @@ -1,35 +0,0 @@ - - -File Index - CoreMark - - - - - - - -
File Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
C
 core_list_join.c
 core_main.c
 core_matrix.c
 core_portme.c
 core_portme.h
 core_portme.mak
 core_state.c
 core_util.c
 CoreMark
 coremark.h
R
 Release Notes
- -
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
- - - -
Version: 1.01
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/Functions.html b/benchmarks/riscv-coremark/coremark/docs/html/index/Functions.html deleted file mode 100644 index a249d5186..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/Functions.html +++ /dev/null @@ -1,55 +0,0 @@ - - -Function Index - CoreMark - - - - - - - -
Function Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
C
 cmp_complex
 cmp_idx
 core_bench_matrix
 core_bench_state
 core_init_state
 core_list_find
 core_list_init
 core_list_insert
 core_list_mergesort
 core_list_remove
 core_list_reverse
 core_list_undo_remove
 core_start_parallel
 core_state_transition
 core_stop_parallel
 crc*
G
 get_seed
 get_time
I
 iterate
M
 main
 matrix_add_const
 matrix_mul_const
 matrix_mul_matrix
 matrix_mul_matrix_bitextract
 matrix_mul_vect
 matrix_sum
 matrix_test
P
 portable_fini
 portable_free
 portable_init
 portable_malloc
S
 start_time
 stop_time
T
 time_in_secs
- -
ee_s32 cmp_complex(list_data *a,
list_data *b,
core_results *res)
Compare the data item in a list cell.
ee_s32 cmp_idx(list_data *a,
list_data *b,
core_results *res)
Compare the idx item in a list cell, and regen the data.
ee_u16 core_bench_matrix(mat_params *p,
ee_s16 seed,
ee_u16 crc)
Benchmark function
ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)
Benchmark function
void core_init_state(ee_u32 size,
ee_s16 seed,
ee_u8 *p)
Initialize the input data for the state machine.
list_head *core_list_find(list_head *list,
list_data *info)
Find an item in the list
list_head *core_list_init(ee_u32 blksize,
list_head *memblock,
ee_s16 seed)
Initialize list with data.
list_head *core_list_insert_new(list_head *insert_point,
list_data *info,
list_head **memblock,
list_data **datablock ,
list_head *memblock_end,
list_data *datablock_end)
Insert an item to the list
list_head *core_list_mergesort(list_head *list,
list_cmp cmp,
core_results *res)
Sort the list in place without recursion.
list_head *core_list_remove(list_head *item)
Remove an item from the list.
list_head *core_list_reverse(list_head *list)
Reverse a list
list_head *core_list_undo_remove(list_head *item_removed,
list_head *item_modified)
Undo a remove operation.
Start benchmarking in a parallel context.
enum CORE_STATE core_state_transition(ee_u8 **instr ,
ee_u32 *transition_count)
Actual state machine.
Stop a parallel context execution of coremark, and gather the results.
Service functions to calculate 16b CRC code.
- - - -
Get a values that cannot be determined at compile time.
CORE_TICKS get_time(void)
Return an abstract “ticks” number that signifies time on the system.
- - - -
Run the benchmark for a specified number of iterations.
- - - -
#if MAIN_HAS_NOARGC MAIN_RETURN_TYPE main(void)
Main entry routine for the benchmark.
void matrix_add_const(ee_u32 N,
MATDAT *A,
MATDAT val)
Add a constant value to all elements of a matrix.
void matrix_mul_const(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT val)
Multiply a matrix by a constant.
void matrix_mul_matrix(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a matrix.
void matrix_mul_matrix_bitextract(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a matrix, and extract some bits from the result.
void matrix_mul_vect(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a vector.
ee_s16 matrix_sum(ee_u32 N,
MATRES *C,
MATDAT clipval)
Calculate a function that depends on the values of elements in the matrix.
ee_s16 matrix_test(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B,
MATDAT val)
Perform matrix manipulation.
- - - -
void portable_fini(core_portable *p)
Target specific final code
void portable_free(void *p)
Provide free() functionality in a platform specific way.
void portable_init(core_portable *p,
int *argc,
char *argv[])
Target specific initialization code Test for some common mistakes.
void *portable_malloc(size_t size)
Provide malloc() functionality in a platform specific way.
- - - -
void start_time(void)
This function will be called right before starting the timed portion of the benchmark.
void stop_time(void)
This function will be called right after ending the timed portion of the benchmark.
- - - -
secs_ret time_in_secs(CORE_TICKS ticks)
Convert the value returned by get_time to seconds.
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/General.html b/benchmarks/riscv-coremark/coremark/docs/html/index/General.html deleted file mode 100644 index bd47b299f..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/General.html +++ /dev/null @@ -1,75 +0,0 @@ - - -Index - CoreMark - - - - - - - -
Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
B
 Build Targets
 Building and running
C
 CC
 CFLAGS
 cmp_complex
 cmp_idx
 Configuration
 core_bench_matrix
 core_bench_state
 core_init_state
 core_list_find
 core_list_init
 core_list_insert
 core_list_join.c
 core_list_mergesort
 core_list_remove
 core_list_reverse
 core_list_undo_remove
 core_main.c
 core_matrix.c
 core_portme.c
 core_portme.h
 core_portme.mak
 core_start_parallel
 core_state.c
 core_state_transition
 core_stop_parallel
 CORE_TICKS
 core_util.c
 CoreMark
 coremark.h
 crc*
 Credits
D
 default_num_contexts
 Description
 Documentation
F
 Functions
G
 get_seed
 get_time
H
 HAS_FLOAT
 HAS_PRINTF
 HAS_STDIO
 HAS_TIME_H
I
 iterate
L
 Legal
 LFLAGS_END
 LOAD
 Log File Format
M
 main
 MAIN_HAS_NOARGC
 MAIN_HAS_NORETURN
 matrix_add_const
 matrix_mul_const
 matrix_mul_matrix
 matrix_mul_matrix_bitextract
 matrix_mul_vect
 matrix_sum
 matrix_test
 MEM_METHOD
 MULTITHREAD
O
 OPATH
 OUTFLAG
P
 PERL
 PORT_OBJS
 port_postbuild
 port_postload
 port_postrun
 port_prebuild
 port_preload
 port_prerun
 PORT_SRCS
 portable_fini
 portable_free
 portable_init
 portable_malloc
R
 Release Notes
 Reporting rules
 RUN
 Run rules
- -
Download the release files from the www.coremark.org.
- - - -
Use this flag to define compiler to use
Use this flag to define compiler options.
ee_s32 cmp_complex(list_data *a,
list_data *b,
core_results *res)
Compare the data item in a list cell.
ee_s32 cmp_idx(list_data *a,
list_data *b,
core_results *res)
Compare the idx item in a list cell, and regen the data.
ee_u16 core_bench_matrix(mat_params *p,
ee_s16 seed,
ee_u16 crc)
Benchmark function
ee_u16 core_bench_state(ee_u32 blksize,
ee_u8 *memblock,
ee_s16 seed1,
ee_s16 seed2,
ee_s16 step,
ee_u16 crc)
Benchmark function
void core_init_state(ee_u32 size,
ee_s16 seed,
ee_u8 *p)
Initialize the input data for the state machine.
list_head *core_list_find(list_head *list,
list_data *info)
Find an item in the list
list_head *core_list_init(ee_u32 blksize,
list_head *memblock,
ee_s16 seed)
Initialize list with data.
list_head *core_list_insert_new(list_head *insert_point,
list_data *info,
list_head **memblock,
list_data **datablock ,
list_head *memblock_end,
list_data *datablock_end)
Insert an item to the list
list_head *core_list_mergesort(list_head *list,
list_cmp cmp,
core_results *res)
Sort the list in place without recursion.
list_head *core_list_remove(list_head *item)
Remove an item from the list.
list_head *core_list_reverse(list_head *list)
Reverse a list
list_head *core_list_undo_remove(list_head *item_removed,
list_head *item_modified)
Undo a remove operation.
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
Start benchmarking in a parallel context.
enum CORE_STATE core_state_transition(ee_u8 **instr ,
ee_u32 *transition_count)
Actual state machine.
Stop a parallel context execution of coremark, and gather the results.
Define type of return from the timing functions.
Service functions to calculate 16b CRC code.
Many thanks to all of the individuals who helped with the development or testing of CoreMark including (Sorted by company name)
- - - -
extern ee_u32 default_num_contexts
Number of contexts to spawn in multicore context.
Benchmark using a linked list.
When you unpack the documentation (tar -vzxf coremark_version_docs.tgz) a docs folder will be created.
- - - - - - - -
Get a values that cannot be determined at compile time.
CORE_TICKS get_time(void)
Return an abstract “ticks” number that signifies time on the system.
- - - -
Define to 1 if the platform supports floating point.
Define to 1 if the platform has stdio.h and implements the printf function.
Define to 1 if the platform has stdio.h.
Define to 1 if platform has the time.h header file, and implementation of functions thereof.
- - - -
Run the benchmark for a specified number of iterations.
- - - -
See LICENSE.txt or the word document file under docs/LICENSE.doc.
Define any libraries needed for linking or other flags that should come at the end of the link line (e.g.
Define this flag if you need to load to a target, as in a cross compile environment.
The log files have the following format
- - - -
#if MAIN_HAS_NOARGC MAIN_RETURN_TYPE main(void)
Main entry routine for the benchmark.
Needed if platform does not support getting arguments to main.
Needed if platform does not support returning a value from main.
void matrix_add_const(ee_u32 N,
MATDAT *A,
MATDAT val)
Add a constant value to all elements of a matrix.
void matrix_mul_const(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT val)
Multiply a matrix by a constant.
void matrix_mul_matrix(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a matrix.
void matrix_mul_matrix_bitextract(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a matrix, and extract some bits from the result.
void matrix_mul_vect(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B)
Multiply a matrix by a vector.
ee_s16 matrix_sum(ee_u32 N,
MATRES *C,
MATDAT clipval)
Calculate a function that depends on the values of elements in the matrix.
ee_s16 matrix_test(ee_u32 N,
MATRES *C,
MATDAT *A,
MATDAT *B,
MATDAT val)
Perform matrix manipulation.
Defines method to get a block of memry.
Define for parallel execution
- - - -
Use this flag to define how to to get an executable (e.g -o)
- - - -
Define perl executable to calculate the geomean if running separate.
Port specific object files can be added here
Generate any files that are needed after actual build end.
Do platform specific after load stuff.
Do platform specific after run stuff.
Generate any files that are needed before actual build starts.
Do platform specific before load stuff.
Do platform specific after run stuff.
Port specific source files can be added here
void portable_fini(core_portable *p)
Target specific final code
void portable_free(void *p)
Provide free() functionality in a platform specific way.
void portable_init(core_portable *p,
int *argc,
char *argv[])
Target specific initialization code Test for some common mistakes.
void *portable_malloc(size_t size)
Provide malloc() functionality in a platform specific way.
- - - -
Version: 1.01
How to report results on a data sheet?
Define this flag if running does not consist of simple invocation of the binary.
What is and is not allowed.
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/General2.html b/benchmarks/riscv-coremark/coremark/docs/html/index/General2.html deleted file mode 100644 index 3852ab5aa..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/General2.html +++ /dev/null @@ -1,47 +0,0 @@ - - -Index - CoreMark - - - - - - - -
Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
S
 secs_ret
 SEED_METHOD
 SEPARATE_COMPILE
 start_time
 stop_time
 Submitting results
T
 time_in_secs
 TIMER_RES_DIVIDER
 TOTAL_DATA_SIZE
 Types
U
 USE_CLOCK
 USE_FORK
 USE_PTHREAD
 USE_SOCKET
V
 Variables
W
 Welcome
- -
For machines that have floating point support, get number of seconds as a double.
Defines method to get seed values that cannot be computed at compile time.
Define if you need to separate compilation from link stage.
void start_time(void)
This function will be called right before starting the timed portion of the benchmark.
void stop_time(void)
This function will be called right after ending the timed portion of the benchmark.
CoreMark results can be submitted on the web.
- - - -
secs_ret time_in_secs(CORE_TICKS ticks)
Convert the value returned by get_time to seconds.
Divider to trade off timer resolution and total time that can be measured.
Define total size for data algorithms will operate on
- - - -
Define to 1 if platform has the time.h header file, and implementation of functions thereof.
Sample implementation for launching parallel contexts This implementation uses fork, waitpid, shmget,shmat and shmdt.
Sample implementation for launching parallel contexts This implementation uses pthread_thread_create and pthread_join.
Sample implementation for launching parallel contexts This implementation uses fork, socket, sendto and recvfrom
- - - - - - - -
Copyright 2009 EEMBC All rights reserved.
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/Types.html b/benchmarks/riscv-coremark/coremark/docs/html/index/Types.html deleted file mode 100644 index 1f4413653..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/Types.html +++ /dev/null @@ -1,31 +0,0 @@ - - -Type Index - CoreMark - - - - - - - -
Type Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
S
 secs_ret
- -
For machines that have floating point support, get number of seconds as a double.
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/index/Variables.html b/benchmarks/riscv-coremark/coremark/docs/html/index/Variables.html deleted file mode 100644 index 8c050daef..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/index/Variables.html +++ /dev/null @@ -1,55 +0,0 @@ - - -Variable Index - CoreMark - - - - - - - -
Variable Index
$#! · 0-9 · A · B · C · D · E · F · G · H · I · J · K · L · M · N · O · P · Q · R · S · T · U · V · W · X · Y · Z
C
 CC
 CFLAGS
D
 default_num_contexts
L
 LFLAGS_END
 LOAD
O
 OPATH
 OUTFLAG
P
 PERL
 PORT_OBJS
 PORT_SRCS
R
 RUN
S
 SEPARATE_COMPILE
- -
Use this flag to define compiler to use
Use this flag to define compiler options.
- - - -
extern ee_u32 default_num_contexts
Number of contexts to spawn in multicore context.
- - - -
Define any libraries needed for linking or other flags that should come at the end of the link line (e.g.
Define this flag if you need to load to a target, as in a cross compile environment.
- - - -
Use this flag to define how to to get an executable (e.g -o)
- - - -
Define perl executable to calculate the geomean if running separate.
Port specific object files can be added here
Port specific source files can be added here
- - - -
Define this flag if running does not consist of simple invocation of the binary.
- - - -
Define if you need to separate compilation from link stage.
- -
- - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/javascript/main.js b/benchmarks/riscv-coremark/coremark/docs/html/javascript/main.js deleted file mode 100644 index 91991f507..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/javascript/main.js +++ /dev/null @@ -1,836 +0,0 @@ -// This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure -// Natural Docs is licensed under the GPL - - -// -// Browser Styles -// ____________________________________________________________________________ - -var agt=navigator.userAgent.toLowerCase(); -var browserType; -var browserVer; - -if (agt.indexOf("opera") != -1) - { - browserType = "Opera"; - - if (agt.indexOf("opera 7") != -1 || agt.indexOf("opera/7") != -1) - { browserVer = "Opera7"; } - else if (agt.indexOf("opera 8") != -1 || agt.indexOf("opera/8") != -1) - { browserVer = "Opera8"; } - else if (agt.indexOf("opera 9") != -1 || agt.indexOf("opera/9") != -1) - { browserVer = "Opera9"; } - } - -else if (agt.indexOf("applewebkit") != -1) - { - browserType = "Safari"; - - if (agt.indexOf("version/3") != -1) - { browserVer = "Safari3"; } - else if (agt.indexOf("safari/4") != -1) - { browserVer = "Safari2"; } - } - -else if (agt.indexOf("khtml") != -1) - { - browserType = "Konqueror"; - } - -else if (agt.indexOf("msie") != -1) - { - browserType = "IE"; - - if (agt.indexOf("msie 6") != -1) - { browserVer = "IE6"; } - else if (agt.indexOf("msie 7") != -1) - { browserVer = "IE7"; } - } - -else if (agt.indexOf("gecko") != -1) - { - browserType = "Firefox"; - - if (agt.indexOf("rv:1.7") != -1) - { browserVer = "Firefox1"; } - else if (agt.indexOf("rv:1.8)") != -1 || agt.indexOf("rv:1.8.0") != -1) - { browserVer = "Firefox15"; } - else if (agt.indexOf("rv:1.8.1") != -1) - { browserVer = "Firefox2"; } - } - - -// -// Support Functions -// ____________________________________________________________________________ - - -function GetXPosition(item) - { - var position = 0; - - if (item.offsetWidth != null) - { - while (item != document.body && item != null) - { - position += item.offsetLeft; - item = item.offsetParent; - }; - }; - - return position; - }; - - -function GetYPosition(item) - { - var position = 0; - - if (item.offsetWidth != null) - { - while (item != document.body && item != null) - { - position += item.offsetTop; - item = item.offsetParent; - }; - }; - - return position; - }; - - -function MoveToPosition(item, x, y) - { - // Opera 5 chokes on the px extension, so it can use the Microsoft one instead. - - if (item.style.left != null) - { - item.style.left = x + "px"; - item.style.top = y + "px"; - } - else if (item.style.pixelLeft != null) - { - item.style.pixelLeft = x; - item.style.pixelTop = y; - }; - }; - - -// -// Menu -// ____________________________________________________________________________ - - -function ToggleMenu(id) - { - if (!window.document.getElementById) - { return; }; - - var display = window.document.getElementById(id).style.display; - - if (display == "none") - { display = "block"; } - else - { display = "none"; } - - window.document.getElementById(id).style.display = display; - } - -function HideAllBut(ids, max) - { - if (document.getElementById) - { - ids.sort( function(a,b) { return a - b; } ); - var number = 1; - - while (number < max) - { - if (ids.length > 0 && number == ids[0]) - { ids.shift(); } - else - { - document.getElementById("MGroupContent" + number).style.display = "none"; - }; - - number++; - }; - }; - } - - -// -// Tooltips -// ____________________________________________________________________________ - - -var tooltipTimer = 0; - -function ShowTip(event, tooltipID, linkID) - { - if (tooltipTimer) - { clearTimeout(tooltipTimer); }; - - var docX = event.clientX + window.pageXOffset; - var docY = event.clientY + window.pageYOffset; - - var showCommand = "ReallyShowTip('" + tooltipID + "', '" + linkID + "', " + docX + ", " + docY + ")"; - - tooltipTimer = setTimeout(showCommand, 1000); - } - -function ReallyShowTip(tooltipID, linkID, docX, docY) - { - tooltipTimer = 0; - - var tooltip; - var link; - - if (document.getElementById) - { - tooltip = document.getElementById(tooltipID); - link = document.getElementById(linkID); - } -/* else if (document.all) - { - tooltip = eval("document.all['" + tooltipID + "']"); - link = eval("document.all['" + linkID + "']"); - } -*/ - if (tooltip) - { - var left = GetXPosition(link); - var top = GetYPosition(link); - top += link.offsetHeight; - - - // The fallback method is to use the mouse X and Y relative to the document. We use a separate if and test if its a number - // in case some browser snuck through the above if statement but didn't support everything. - - if (!isFinite(top) || top == 0) - { - left = docX; - top = docY; - } - - // Some spacing to get it out from under the cursor. - - top += 10; - - // Make sure the tooltip doesnt get smushed by being too close to the edge, or in some browsers, go off the edge of the - // page. We do it here because Konqueror does get offsetWidth right even if it doesnt get the positioning right. - - if (tooltip.offsetWidth != null) - { - var width = tooltip.offsetWidth; - var docWidth = document.body.clientWidth; - - if (left + width > docWidth) - { left = docWidth - width - 1; } - - // If there's a horizontal scroll bar we could go past zero because it's using the page width, not the window width. - if (left < 0) - { left = 0; }; - } - - MoveToPosition(tooltip, left, top); - tooltip.style.visibility = "visible"; - } - } - -function HideTip(tooltipID) - { - if (tooltipTimer) - { - clearTimeout(tooltipTimer); - tooltipTimer = 0; - } - - var tooltip; - - if (document.getElementById) - { tooltip = document.getElementById(tooltipID); } - else if (document.all) - { tooltip = eval("document.all['" + tooltipID + "']"); } - - if (tooltip) - { tooltip.style.visibility = "hidden"; } - } - - -// -// Blockquote fix for IE -// ____________________________________________________________________________ - - -function NDOnLoad() - { - if (browserVer == "IE6") - { - var scrollboxes = document.getElementsByTagName('blockquote'); - - if (scrollboxes.item(0)) - { - NDDoResize(); - window.onresize=NDOnResize; - }; - }; - }; - - -var resizeTimer = 0; - -function NDOnResize() - { - if (resizeTimer != 0) - { clearTimeout(resizeTimer); }; - - resizeTimer = setTimeout(NDDoResize, 250); - }; - - -function NDDoResize() - { - var scrollboxes = document.getElementsByTagName('blockquote'); - - var i; - var item; - - i = 0; - while (item = scrollboxes.item(i)) - { - item.style.width = 100; - i++; - }; - - i = 0; - while (item = scrollboxes.item(i)) - { - item.style.width = item.parentNode.offsetWidth; - i++; - }; - - clearTimeout(resizeTimer); - resizeTimer = 0; - } - - - -/* ________________________________________________________________________________________________________ - - Class: SearchPanel - ________________________________________________________________________________________________________ - - A class handling everything associated with the search panel. - - Parameters: - - name - The name of the global variable that will be storing this instance. Is needed to be able to set timeouts. - mode - The mode the search is going to work in. Pass CommandLineOption()>, so the - value will be something like "HTML" or "FramedHTML". - - ________________________________________________________________________________________________________ -*/ - - -function SearchPanel(name, mode, resultsPath) - { - if (!name || !mode || !resultsPath) - { alert("Incorrect parameters to SearchPanel."); }; - - - // Group: Variables - // ________________________________________________________________________ - - /* - var: name - The name of the global variable that will be storing this instance of the class. - */ - this.name = name; - - /* - var: mode - The mode the search is going to work in, such as "HTML" or "FramedHTML". - */ - this.mode = mode; - - /* - var: resultsPath - The relative path from the current HTML page to the results page directory. - */ - this.resultsPath = resultsPath; - - /* - var: keyTimeout - The timeout used between a keystroke and when a search is performed. - */ - this.keyTimeout = 0; - - /* - var: keyTimeoutLength - The length of in thousandths of a second. - */ - this.keyTimeoutLength = 500; - - /* - var: lastSearchValue - The last search string executed, or an empty string if none. - */ - this.lastSearchValue = ""; - - /* - var: lastResultsPage - The last results page. The value is only relevant if is set. - */ - this.lastResultsPage = ""; - - /* - var: deactivateTimeout - - The timeout used between when a control is deactivated and when the entire panel is deactivated. Is necessary - because a control may be deactivated in favor of another control in the same panel, in which case it should stay - active. - */ - this.deactivateTimout = 0; - - /* - var: deactivateTimeoutLength - The length of in thousandths of a second. - */ - this.deactivateTimeoutLength = 200; - - - - - // Group: DOM Elements - // ________________________________________________________________________ - - - // Function: DOMSearchField - this.DOMSearchField = function() - { return document.getElementById("MSearchField"); }; - - // Function: DOMSearchType - this.DOMSearchType = function() - { return document.getElementById("MSearchType"); }; - - // Function: DOMPopupSearchResults - this.DOMPopupSearchResults = function() - { return document.getElementById("MSearchResults"); }; - - // Function: DOMPopupSearchResultsWindow - this.DOMPopupSearchResultsWindow = function() - { return document.getElementById("MSearchResultsWindow"); }; - - // Function: DOMSearchPanel - this.DOMSearchPanel = function() - { return document.getElementById("MSearchPanel"); }; - - - - - // Group: Event Handlers - // ________________________________________________________________________ - - - /* - Function: OnSearchFieldFocus - Called when focus is added or removed from the search field. - */ - this.OnSearchFieldFocus = function(isActive) - { - this.Activate(isActive); - }; - - - /* - Function: OnSearchFieldChange - Called when the content of the search field is changed. - */ - this.OnSearchFieldChange = function() - { - if (this.keyTimeout) - { - clearTimeout(this.keyTimeout); - this.keyTimeout = 0; - }; - - var searchValue = this.DOMSearchField().value.replace(/ +/g, ""); - - if (searchValue != this.lastSearchValue) - { - if (searchValue != "") - { - this.keyTimeout = setTimeout(this.name + ".Search()", this.keyTimeoutLength); - } - else - { - if (this.mode == "HTML") - { this.DOMPopupSearchResultsWindow().style.display = "none"; }; - this.lastSearchValue = ""; - }; - }; - }; - - - /* - Function: OnSearchTypeFocus - Called when focus is added or removed from the search type. - */ - this.OnSearchTypeFocus = function(isActive) - { - this.Activate(isActive); - }; - - - /* - Function: OnSearchTypeChange - Called when the search type is changed. - */ - this.OnSearchTypeChange = function() - { - var searchValue = this.DOMSearchField().value.replace(/ +/g, ""); - - if (searchValue != "") - { - this.Search(); - }; - }; - - - - // Group: Action Functions - // ________________________________________________________________________ - - - /* - Function: CloseResultsWindow - Closes the results window. - */ - this.CloseResultsWindow = function() - { - this.DOMPopupSearchResultsWindow().style.display = "none"; - this.Activate(false, true); - }; - - - /* - Function: Search - Performs a search. - */ - this.Search = function() - { - this.keyTimeout = 0; - - var searchValue = this.DOMSearchField().value.replace(/^ +/, ""); - var searchTopic = this.DOMSearchType().value; - - var pageExtension = searchValue.substr(0,1); - - if (pageExtension.match(/^[a-z]/i)) - { pageExtension = pageExtension.toUpperCase(); } - else if (pageExtension.match(/^[0-9]/)) - { pageExtension = 'Numbers'; } - else - { pageExtension = "Symbols"; }; - - var resultsPage; - var resultsPageWithSearch; - var hasResultsPage; - - // indexSectionsWithContent is defined in searchdata.js - if (indexSectionsWithContent[searchTopic][pageExtension] == true) - { - resultsPage = this.resultsPath + '/' + searchTopic + pageExtension + '.html'; - resultsPageWithSearch = resultsPage+'?'+escape(searchValue); - hasResultsPage = true; - } - else - { - resultsPage = this.resultsPath + '/NoResults.html'; - resultsPageWithSearch = resultsPage; - hasResultsPage = false; - }; - - var resultsFrame; - if (this.mode == "HTML") - { resultsFrame = window.frames.MSearchResults; } - else if (this.mode == "FramedHTML") - { resultsFrame = window.top.frames['Content']; }; - - - if (resultsPage != this.lastResultsPage || - - // Bug in IE. If everything becomes hidden in a run, none of them will be able to be reshown in the next for some - // reason. It counts the right number of results, and you can even read the display as "block" after setting it, but it - // just doesn't work in IE 6 or IE 7. So if we're on the right page but the previous search had no results, reload the - // page anyway to get around the bug. - (browserType == "IE" && hasResultsPage && - (!resultsFrame.searchResults || resultsFrame.searchResults.lastMatchCount == 0)) ) - - { - resultsFrame.location.href = resultsPageWithSearch; - } - - // So if the results page is right and there's no IE bug, reperform the search on the existing page. We have to check if there - // are results because NoResults.html doesn't have any JavaScript, and it would be useless to do anything on that page even - // if it did. - else if (hasResultsPage) - { - // We need to check if this exists in case the frame is present but didn't finish loading. - if (resultsFrame.searchResults) - { resultsFrame.searchResults.Search(searchValue); } - - // Otherwise just reload instead of waiting. - else - { resultsFrame.location.href = resultsPageWithSearch; }; - }; - - - var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow(); - - if (this.mode == "HTML" && domPopupSearchResultsWindow.style.display != "block") - { - var domSearchType = this.DOMSearchType(); - - var left = GetXPosition(domSearchType); - var top = GetYPosition(domSearchType) + domSearchType.offsetHeight; - - MoveToPosition(domPopupSearchResultsWindow, left, top); - domPopupSearchResultsWindow.style.display = 'block'; - }; - - - this.lastSearchValue = searchValue; - this.lastResultsPage = resultsPage; - }; - - - - // Group: Activation Functions - // Functions that handle whether the entire panel is active or not. - // ________________________________________________________________________ - - - /* - Function: Activate - - Activates or deactivates the search panel, resetting things to their default values if necessary. You can call this on every - control's OnBlur() and it will handle not deactivating the entire panel when focus is just switching between them transparently. - - Parameters: - - isActive - Whether you're activating or deactivating the panel. - ignoreDeactivateDelay - Set if you're positive the action will deactivate the panel and thus want to skip the delay. - */ - this.Activate = function(isActive, ignoreDeactivateDelay) - { - // We want to ignore isActive being false while the results window is open. - if (isActive || (this.mode == "HTML" && this.DOMPopupSearchResultsWindow().style.display == "block")) - { - if (this.inactivateTimeout) - { - clearTimeout(this.inactivateTimeout); - this.inactivateTimeout = 0; - }; - - this.DOMSearchPanel().className = 'MSearchPanelActive'; - - var searchField = this.DOMSearchField(); - - if (searchField.value == 'Search') - { searchField.value = ""; } - } - else if (!ignoreDeactivateDelay) - { - this.inactivateTimeout = setTimeout(this.name + ".InactivateAfterTimeout()", this.inactivateTimeoutLength); - } - else - { - this.InactivateAfterTimeout(); - }; - }; - - - /* - Function: InactivateAfterTimeout - - Called by , which is set by . Inactivation occurs on a timeout because a control may - receive OnBlur() when focus is really transferring to another control in the search panel. In this case we don't want to - actually deactivate the panel because not only would that cause a visible flicker but it could also reset the search value. - So by doing it on a timeout instead, there's a short period where the second control's OnFocus() can cancel the deactivation. - */ - this.InactivateAfterTimeout = function() - { - this.inactivateTimeout = 0; - - this.DOMSearchPanel().className = 'MSearchPanelInactive'; - this.DOMSearchField().value = "Search"; - - this.lastSearchValue = ""; - this.lastResultsPage = ""; - }; - }; - - - - -/* ________________________________________________________________________________________________________ - - Class: SearchResults - _________________________________________________________________________________________________________ - - The class that handles everything on the search results page. - _________________________________________________________________________________________________________ -*/ - - -function SearchResults(name, mode) - { - /* - var: mode - The mode the search is going to work in, such as "HTML" or "FramedHTML". - */ - this.mode = mode; - - /* - var: lastMatchCount - The number of matches from the last run of . - */ - this.lastMatchCount = 0; - - - /* - Function: Toggle - Toggles the visibility of the passed element ID. - */ - this.Toggle = function(id) - { - if (this.mode == "FramedHTML") - { return; }; - - var parentElement = document.getElementById(id); - - var element = parentElement.firstChild; - - while (element && element != parentElement) - { - if (element.nodeName == 'DIV' && element.className == 'ISubIndex') - { - if (element.style.display == 'block') - { element.style.display = "none"; } - else - { element.style.display = 'block'; } - }; - - if (element.nodeName == 'DIV' && element.hasChildNodes()) - { element = element.firstChild; } - else if (element.nextSibling) - { element = element.nextSibling; } - else - { - do - { - element = element.parentNode; - } - while (element && element != parentElement && !element.nextSibling); - - if (element && element != parentElement) - { element = element.nextSibling; }; - }; - }; - }; - - - /* - Function: Search - - Searches for the passed string. If there is no parameter, it takes it from the URL query. - - Always returns true, since other documents may try to call it and that may or may not be possible. - */ - this.Search = function(search) - { - if (!search) - { - search = window.location.search; - search = search.substring(1); // Remove the leading ? - search = unescape(search); - }; - - search = search.replace(/^ +/, ""); - search = search.replace(/ +$/, ""); - search = search.toLowerCase(); - - if (search.match(/[^a-z0-9]/)) // Just a little speedup so it doesn't have to go through the below unnecessarily. - { - search = search.replace(/\_/g, "_und"); - search = search.replace(/\ +/gi, "_spc"); - search = search.replace(/\~/g, "_til"); - search = search.replace(/\!/g, "_exc"); - search = search.replace(/\@/g, "_att"); - search = search.replace(/\#/g, "_num"); - search = search.replace(/\$/g, "_dol"); - search = search.replace(/\%/g, "_pct"); - search = search.replace(/\^/g, "_car"); - search = search.replace(/\&/g, "_amp"); - search = search.replace(/\*/g, "_ast"); - search = search.replace(/\(/g, "_lpa"); - search = search.replace(/\)/g, "_rpa"); - search = search.replace(/\-/g, "_min"); - search = search.replace(/\+/g, "_plu"); - search = search.replace(/\=/g, "_equ"); - search = search.replace(/\{/g, "_lbc"); - search = search.replace(/\}/g, "_rbc"); - search = search.replace(/\[/g, "_lbk"); - search = search.replace(/\]/g, "_rbk"); - search = search.replace(/\:/g, "_col"); - search = search.replace(/\;/g, "_sco"); - search = search.replace(/\"/g, "_quo"); - search = search.replace(/\'/g, "_apo"); - search = search.replace(/\/g, "_ran"); - search = search.replace(/\,/g, "_com"); - search = search.replace(/\./g, "_per"); - search = search.replace(/\?/g, "_que"); - search = search.replace(/\//g, "_sla"); - search = search.replace(/[^a-z0-9\_]i/gi, "_zzz"); - }; - - var resultRows = document.getElementsByTagName("div"); - var matches = 0; - - var i = 0; - while (i < resultRows.length) - { - var row = resultRows.item(i); - - if (row.className == "SRResult") - { - var rowMatchName = row.id.toLowerCase(); - rowMatchName = rowMatchName.replace(/^sr\d*_/, ''); - - if (search.length <= rowMatchName.length && rowMatchName.substr(0, search.length) == search) - { - row.style.display = "block"; - matches++; - } - else - { row.style.display = "none"; }; - }; - - i++; - }; - - document.getElementById("Searching").style.display="none"; - - if (matches == 0) - { document.getElementById("NoMatches").style.display="block"; } - else - { document.getElementById("NoMatches").style.display="none"; } - - this.lastMatchCount = matches; - - return true; - }; - }; - diff --git a/benchmarks/riscv-coremark/coremark/docs/html/javascript/searchdata.js b/benchmarks/riscv-coremark/coremark/docs/html/javascript/searchdata.js deleted file mode 100644 index 901318e77..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/javascript/searchdata.js +++ /dev/null @@ -1,212 +0,0 @@ -var indexSectionsWithContent = { - "General": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": true, - "D": true, - "E": false, - "F": true, - "G": true, - "H": false, - "I": true, - "J": false, - "K": false, - "L": false, - "M": true, - "N": false, - "O": false, - "P": false, - "Q": false, - "R": false, - "S": true, - "T": true, - "U": false, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - }, - "Variables": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": true, - "D": true, - "E": false, - "F": false, - "G": false, - "H": false, - "I": false, - "J": false, - "K": false, - "L": true, - "M": false, - "N": false, - "O": true, - "P": true, - "Q": false, - "R": true, - "S": true, - "T": false, - "U": false, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - }, - "Functions": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": true, - "D": false, - "E": false, - "F": false, - "G": true, - "H": false, - "I": true, - "J": false, - "K": false, - "L": false, - "M": true, - "N": false, - "O": false, - "P": true, - "Q": false, - "R": false, - "S": true, - "T": true, - "U": false, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - }, - "Files": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": true, - "D": false, - "E": false, - "F": false, - "G": false, - "H": false, - "I": false, - "J": false, - "K": false, - "L": false, - "M": false, - "N": false, - "O": false, - "P": false, - "Q": false, - "R": true, - "S": false, - "T": false, - "U": false, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - }, - "Configuration": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": true, - "D": false, - "E": false, - "F": false, - "G": false, - "H": true, - "I": false, - "J": false, - "K": false, - "L": false, - "M": true, - "N": false, - "O": false, - "P": false, - "Q": false, - "R": false, - "S": true, - "T": true, - "U": true, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - }, - "Types": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": false, - "D": false, - "E": false, - "F": false, - "G": false, - "H": false, - "I": false, - "J": false, - "K": false, - "L": false, - "M": false, - "N": false, - "O": false, - "P": false, - "Q": false, - "R": false, - "S": true, - "T": false, - "U": false, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - }, - "BuildTargets": { - "Symbols": false, - "Numbers": false, - "A": false, - "B": false, - "C": false, - "D": false, - "E": false, - "F": false, - "G": false, - "H": false, - "I": false, - "J": false, - "K": false, - "L": false, - "M": false, - "N": false, - "O": false, - "P": true, - "Q": false, - "R": false, - "S": false, - "T": false, - "U": false, - "V": false, - "W": false, - "X": false, - "Y": false, - "Z": false - } - } \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/BuildTargetsP.html b/benchmarks/riscv-coremark/coremark/docs/html/search/BuildTargetsP.html deleted file mode 100644 index 65e741d65..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/BuildTargetsP.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationC.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationC.html deleted file mode 100644 index 84b49ca3a..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationC.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationH.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationH.html deleted file mode 100644 index 3b0c39213..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationH.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationM.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationM.html deleted file mode 100644 index 022606fa2..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationM.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationS.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationS.html deleted file mode 100644 index d26de19b9..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationS.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationT.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationT.html deleted file mode 100644 index 183daf1ee..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationT.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationU.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationU.html deleted file mode 100644 index d9b46a52d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationU.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsH.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsH.html deleted file mode 100644 index ade2ab757..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsH.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsM.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsM.html deleted file mode 100644 index baa189221..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsM.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsS.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsS.html deleted file mode 100644 index ceb8abf51..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsS.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsT.html b/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsT.html deleted file mode 100644 index ef138108f..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/ConfigurationsT.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FilesC.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FilesC.html deleted file mode 100644 index e2b01c4b1..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FilesC.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FilesR.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FilesR.html deleted file mode 100644 index 6202fb7c7..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FilesR.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsC.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsC.html deleted file mode 100644 index 43993db85..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsC.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsG.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsG.html deleted file mode 100644 index 217e8540b..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsG.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsI.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsI.html deleted file mode 100644 index f17354d65..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsI.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsM.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsM.html deleted file mode 100644 index 345e2ba83..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsM.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsP.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsP.html deleted file mode 100644 index c4b9d2dbe..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsP.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsS.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsS.html deleted file mode 100644 index 33dfa5fa4..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsS.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsT.html b/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsT.html deleted file mode 100644 index 65ae37ccd..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/FunctionsT.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralB.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralB.html deleted file mode 100644 index 66e27e49b..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralB.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralC.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralC.html deleted file mode 100644 index f1ac9d2d3..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralC.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralD.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralD.html deleted file mode 100644 index b3c21002d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralD.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralF.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralF.html deleted file mode 100644 index 126a24c57..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralF.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralG.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralG.html deleted file mode 100644 index 217e8540b..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralG.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralH.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralH.html deleted file mode 100644 index 3b0c39213..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralH.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralI.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralI.html deleted file mode 100644 index f17354d65..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralI.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralL.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralL.html deleted file mode 100644 index 22a700c93..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralL.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralM.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralM.html deleted file mode 100644 index 57f55b249..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralM.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralO.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralO.html deleted file mode 100644 index b14f18001..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralO.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralP.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralP.html deleted file mode 100644 index 063a6c13e..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralP.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralR.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralR.html deleted file mode 100644 index 24f33954e..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralR.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralS.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralS.html deleted file mode 100644 index a18c40715..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralS.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralT.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralT.html deleted file mode 100644 index a2fde7e28..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralT.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralU.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralU.html deleted file mode 100644 index d9b46a52d..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralU.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralV.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralV.html deleted file mode 100644 index 9c53066a5..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralV.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralW.html b/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralW.html deleted file mode 100644 index e22dcb062..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/GeneralW.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/NoResults.html b/benchmarks/riscv-coremark/coremark/docs/html/search/NoResults.html deleted file mode 100644 index 49e385959..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/NoResults.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - -
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/TypesS.html b/benchmarks/riscv-coremark/coremark/docs/html/search/TypesS.html deleted file mode 100644 index 3d87649f5..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/TypesS.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesC.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesC.html deleted file mode 100644 index d3bdfef76..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesC.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesD.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesD.html deleted file mode 100644 index d4b961d3c..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesD.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesL.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesL.html deleted file mode 100644 index 09e4b9abc..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesL.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesO.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesO.html deleted file mode 100644 index b14f18001..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesO.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesP.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesP.html deleted file mode 100644 index c687999aa..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesP.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesR.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesR.html deleted file mode 100644 index 9cd771d25..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesR.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesS.html b/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesS.html deleted file mode 100644 index a1280a7d0..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/search/VariablesS.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - -
Loading...
Searching...
No Matches
\ No newline at end of file diff --git a/benchmarks/riscv-coremark/coremark/docs/html/styles/1.css b/benchmarks/riscv-coremark/coremark/docs/html/styles/1.css deleted file mode 100644 index d5a8bd6a2..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/styles/1.css +++ /dev/null @@ -1,767 +0,0 @@ -/* - IMPORTANT: If you're editing this file in the output directory of one of - your projects, your changes will be overwritten the next time you run - Natural Docs. Instead, copy this file to your project directory, make your - changes, and you can use it with -s. Even better would be to make a CSS - file in your project directory with only your changes, which you can then - use with -s [original style] [your changes]. - - On the other hand, if you're editing this file in the Natural Docs styles - directory, the changes will automatically be applied to all your projects - that use this style the next time Natural Docs is run on them. - - This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure - Natural Docs is licensed under the GPL -*/ - -body { - font: 10pt Verdana, Arial, sans-serif; - color: #000000; - margin: 0; padding: 0; - } - -.ContentPage, -.IndexPage, -.FramedMenuPage { - background-color: #E8E8E8; - } -.FramedContentPage, -.FramedIndexPage, -.FramedSearchResultsPage, -.PopupSearchResultsPage { - background-color: #FFFFFF; - } - - -a:link, -a:visited { color: #900000; text-decoration: none } -a:hover { color: #900000; text-decoration: underline } -a:active { color: #FF0000; text-decoration: underline } - -td { - vertical-align: top } - -img { border: 0; } - - -/* - Comment out this line to use web-style paragraphs (blank line between - paragraphs, no indent) instead of print-style paragraphs (no blank line, - indented.) -*/ -p { - text-indent: 5ex; margin: 0 } - - -/* Can't use something like display: none or it won't break. */ -.HB { - font-size: 1px; - visibility: hidden; - } - -/* Blockquotes are used as containers for things that may need to scroll. */ -blockquote { - padding: 0; - margin: 0; - overflow: auto; - } - - -.Firefox1 blockquote { - padding-bottom: .5em; - } - -/* Turn off scrolling when printing. */ -@media print { - blockquote { - overflow: visible; - } - .IE blockquote { - width: auto; - } - } - - - -#Menu { - font-size: 9pt; - padding: 10px 0 0 0; - } -.ContentPage #Menu, -.IndexPage #Menu { - position: absolute; - top: 0; - left: 0; - width: 31ex; - overflow: hidden; - } -.ContentPage .Firefox #Menu, -.IndexPage .Firefox #Menu { - width: 27ex; - } - - - .MTitle { - font-size: 16pt; font-weight: bold; font-variant: small-caps; - text-align: center; - padding: 5px 10px 15px 10px; - border-bottom: 1px dotted #000000; - margin-bottom: 15px } - - .MSubTitle { - font-size: 9pt; font-weight: normal; font-variant: normal; - margin-top: 1ex; margin-bottom: 5px } - - - .MEntry a:link, - .MEntry a:hover, - .MEntry a:visited { color: #606060; margin-right: 0 } - .MEntry a:active { color: #A00000; margin-right: 0 } - - - .MGroup { - font-variant: small-caps; font-weight: bold; - margin: 1em 0 1em 10px; - } - - .MGroupContent { - font-variant: normal; font-weight: normal } - - .MGroup a:link, - .MGroup a:hover, - .MGroup a:visited { color: #545454; margin-right: 10px } - .MGroup a:active { color: #A00000; margin-right: 10px } - - - .MFile, - .MText, - .MLink, - .MIndex { - padding: 1px 17px 2px 10px; - margin: .25em 0 .25em 0; - } - - .MText { - font-size: 8pt; font-style: italic } - - .MLink { - font-style: italic } - - #MSelected { - color: #000000; background-color: #FFFFFF; - /* Replace padding with border. */ - padding: 0 10px 0 10px; - border-width: 1px 2px 2px 0; border-style: solid; border-color: #000000; - margin-right: 5px; - } - - /* Close off the left side when its in a group. */ - .MGroup #MSelected { - padding-left: 9px; border-left-width: 1px } - - /* A treat for Mozilla users. Blatantly non-standard. Will be replaced with CSS 3 attributes when finalized/supported. */ - .Firefox #MSelected { - -moz-border-radius-topright: 10px; - -moz-border-radius-bottomright: 10px } - .Firefox .MGroup #MSelected { - -moz-border-radius-topleft: 10px; - -moz-border-radius-bottomleft: 10px } - - - #MSearchPanel { - padding: 0px 6px; - margin: .25em 0; - } - - - #MSearchField { - font: italic 9pt Verdana, sans-serif; - color: #606060; - background-color: #E8E8E8; - border: none; - padding: 2px 4px; - width: 100%; - } - /* Only Opera gets it right. */ - .Firefox #MSearchField, - .IE #MSearchField, - .Safari #MSearchField { - width: 94%; - } - .Opera9 #MSearchField, - .Konqueror #MSearchField { - width: 97%; - } - .FramedMenuPage .Firefox #MSearchField, - .FramedMenuPage .Safari #MSearchField, - .FramedMenuPage .Konqueror #MSearchField { - width: 98%; - } - - /* Firefox doesn't do this right in frames without #MSearchPanel added on. - It's presence doesn't hurt anything other browsers. */ - #MSearchPanel.MSearchPanelInactive:hover #MSearchField { - background-color: #FFFFFF; - border: 1px solid #C0C0C0; - padding: 1px 3px; - } - .MSearchPanelActive #MSearchField { - background-color: #FFFFFF; - border: 1px solid #C0C0C0; - font-style: normal; - padding: 1px 3px; - } - - #MSearchType { - visibility: hidden; - font: 8pt Verdana, sans-serif; - width: 98%; - padding: 0; - border: 1px solid #C0C0C0; - } - .MSearchPanelActive #MSearchType, - /* As mentioned above, Firefox doesn't do this right in frames without #MSearchPanel added on. */ - #MSearchPanel.MSearchPanelInactive:hover #MSearchType, - #MSearchType:focus { - visibility: visible; - color: #606060; - } - #MSearchType option#MSearchEverything { - font-weight: bold; - } - - .Opera8 .MSearchPanelInactive:hover, - .Opera8 .MSearchPanelActive { - margin-left: -1px; - } - - - iframe#MSearchResults { - width: 60ex; - height: 15em; - } - #MSearchResultsWindow { - display: none; - position: absolute; - left: 0; top: 0; - border: 1px solid #000000; - background-color: #E8E8E8; - } - #MSearchResultsWindowClose { - font-weight: bold; - font-size: 8pt; - display: block; - padding: 2px 5px; - } - #MSearchResultsWindowClose:link, - #MSearchResultsWindowClose:visited { - color: #000000; - text-decoration: none; - } - #MSearchResultsWindowClose:active, - #MSearchResultsWindowClose:hover { - color: #800000; - text-decoration: none; - background-color: #F4F4F4; - } - - - - -#Content { - padding-bottom: 15px; - } - -.ContentPage #Content { - border-width: 0 0 1px 1px; - border-style: solid; - border-color: #000000; - background-color: #FFFFFF; - font-size: 9pt; /* To make 31ex match the menu's 31ex. */ - margin-left: 31ex; - } -.ContentPage .Firefox #Content { - margin-left: 27ex; - } - - - - .CTopic { - font-size: 10pt; - margin-bottom: 3em; - } - - - .CTitle { - font-size: 12pt; font-weight: bold; - border-width: 0 0 1px 0; border-style: solid; border-color: #A0A0A0; - margin: 0 15px .5em 15px } - - .CGroup .CTitle { - font-size: 16pt; font-variant: small-caps; - padding-left: 15px; padding-right: 15px; - border-width: 0 0 2px 0; border-color: #000000; - margin-left: 0; margin-right: 0 } - - .CClass .CTitle, - .CInterface .CTitle, - .CDatabase .CTitle, - .CDatabaseTable .CTitle, - .CSection .CTitle { - font-size: 18pt; - color: #FFFFFF; background-color: #A0A0A0; - padding: 10px 15px 10px 15px; - border-width: 2px 0; border-color: #000000; - margin-left: 0; margin-right: 0 } - - #MainTopic .CTitle { - font-size: 20pt; - color: #FFFFFF; background-color: #7070C0; - padding: 10px 15px 10px 15px; - border-width: 0 0 3px 0; border-color: #000000; - margin-left: 0; margin-right: 0 } - - .CBody { - margin-left: 15px; margin-right: 15px } - - - .CToolTip { - position: absolute; visibility: hidden; - left: 0; top: 0; - background-color: #FFFFE0; - padding: 5px; - border-width: 1px 2px 2px 1px; border-style: solid; border-color: #000000; - font-size: 8pt; - } - - .Opera .CToolTip { - max-width: 98%; - } - - /* Scrollbars would be useless. */ - .CToolTip blockquote { - overflow: hidden; - } - .IE6 .CToolTip blockquote { - overflow: visible; - } - - .CHeading { - font-weight: bold; font-size: 10pt; - margin: 1.5em 0 .5em 0; - } - - .CBody pre { - font: 10pt "Courier New", Courier, monospace; - margin: 1em 0; - } - - .CBody ul { - /* I don't know why CBody's margin doesn't apply, but it's consistent across browsers so whatever. - Reapply it here as padding. */ - padding-left: 15px; padding-right: 15px; - margin: .5em 5ex .5em 5ex; - } - - .CDescriptionList { - margin: .5em 5ex 0 5ex } - - .CDLEntry { - font: 10pt "Courier New", Courier, monospace; color: #808080; - padding-bottom: .25em; - white-space: nowrap } - - .CDLDescription { - font-size: 10pt; /* For browsers that don't inherit correctly, like Opera 5. */ - padding-bottom: .5em; padding-left: 5ex } - - - .CTopic img { - text-align: center; - display: block; - margin: 1em auto; - } - .CImageCaption { - font-variant: small-caps; - font-size: 8pt; - color: #808080; - text-align: center; - position: relative; - top: 1em; - } - - .CImageLink { - color: #808080; - font-style: italic; - } - a.CImageLink:link, - a.CImageLink:visited, - a.CImageLink:hover { color: #808080 } - - - - - -.Prototype { - font: 10pt "Courier New", Courier, monospace; - padding: 5px 3ex; - border-width: 1px; border-style: solid; - margin: 0 5ex 1.5em 5ex; - } - - .Prototype td { - font-size: 10pt; - } - - .PDefaultValue, - .PDefaultValuePrefix, - .PTypePrefix { - color: #8F8F8F; - } - .PTypePrefix { - text-align: right; - } - .PAfterParameters { - vertical-align: bottom; - } - - .IE .Prototype table { - padding: 0; - } - - .CFunction .Prototype { - background-color: #F4F4F4; border-color: #D0D0D0 } - .CProperty .Prototype { - background-color: #F4F4FF; border-color: #C0C0E8 } - .CVariable .Prototype { - background-color: #FFFFF0; border-color: #E0E0A0 } - - .CClass .Prototype { - border-width: 1px 2px 2px 1px; border-style: solid; border-color: #A0A0A0; - background-color: #F4F4F4; - } - .CInterface .Prototype { - border-width: 1px 2px 2px 1px; border-style: solid; border-color: #A0A0D0; - background-color: #F4F4FF; - } - - .CDatabaseIndex .Prototype, - .CConstant .Prototype { - background-color: #D0D0D0; border-color: #000000 } - .CType .Prototype, - .CEnumeration .Prototype { - background-color: #FAF0F0; border-color: #E0B0B0; - } - .CDatabaseTrigger .Prototype, - .CEvent .Prototype, - .CDelegate .Prototype { - background-color: #F0FCF0; border-color: #B8E4B8 } - - .CToolTip .Prototype { - margin: 0 0 .5em 0; - white-space: nowrap; - } - - - - - -.Summary { - margin: 1.5em 5ex 0 5ex } - - .STitle { - font-size: 12pt; font-weight: bold; - margin-bottom: .5em } - - - .SBorder { - background-color: #FFFFF0; - padding: 15px; - border: 1px solid #C0C060 } - - /* In a frame IE 6 will make them too long unless you set the width to 100%. Without frames it will be correct without a width - or slightly too long (but not enough to scroll) with a width. This arbitrary weirdness simply astounds me. IE 7 has the same - problem with frames, haven't tested it without. */ - .FramedContentPage .IE .SBorder { - width: 100% } - - /* A treat for Mozilla users. Blatantly non-standard. Will be replaced with CSS 3 attributes when finalized/supported. */ - .Firefox .SBorder { - -moz-border-radius: 20px } - - - .STable { - font-size: 9pt; width: 100% } - - .SEntry { - width: 30% } - .SDescription { - width: 70% } - - - .SMarked { - background-color: #F8F8D8 } - - .SDescription { padding-left: 2ex } - .SIndent1 .SEntry { padding-left: 1.5ex } .SIndent1 .SDescription { padding-left: 3.5ex } - .SIndent2 .SEntry { padding-left: 3.0ex } .SIndent2 .SDescription { padding-left: 5.0ex } - .SIndent3 .SEntry { padding-left: 4.5ex } .SIndent3 .SDescription { padding-left: 6.5ex } - .SIndent4 .SEntry { padding-left: 6.0ex } .SIndent4 .SDescription { padding-left: 8.0ex } - .SIndent5 .SEntry { padding-left: 7.5ex } .SIndent5 .SDescription { padding-left: 9.5ex } - - .SDescription a { color: #800000} - .SDescription a:active { color: #A00000 } - - .SGroup td { - padding-top: .5em; padding-bottom: .25em } - - .SGroup .SEntry { - font-weight: bold; font-variant: small-caps } - - .SGroup .SEntry a { color: #800000 } - .SGroup .SEntry a:active { color: #F00000 } - - - .SMain td, - .SClass td, - .SDatabase td, - .SDatabaseTable td, - .SSection td { - font-size: 10pt; - padding-bottom: .25em } - - .SClass td, - .SDatabase td, - .SDatabaseTable td, - .SSection td { - padding-top: 1em } - - .SMain .SEntry, - .SClass .SEntry, - .SDatabase .SEntry, - .SDatabaseTable .SEntry, - .SSection .SEntry { - font-weight: bold; - } - - .SMain .SEntry a, - .SClass .SEntry a, - .SDatabase .SEntry a, - .SDatabaseTable .SEntry a, - .SSection .SEntry a { color: #000000 } - - .SMain .SEntry a:active, - .SClass .SEntry a:active, - .SDatabase .SEntry a:active, - .SDatabaseTable .SEntry a:active, - .SSection .SEntry a:active { color: #A00000 } - - - - - -.ClassHierarchy { - margin: 0 15px 1em 15px } - - .CHEntry { - border-width: 1px 2px 2px 1px; border-style: solid; border-color: #A0A0A0; - margin-bottom: 3px; - padding: 2px 2ex; - font-size: 10pt; - background-color: #F4F4F4; color: #606060; - } - - .Firefox .CHEntry { - -moz-border-radius: 4px; - } - - .CHCurrent .CHEntry { - font-weight: bold; - border-color: #000000; - color: #000000; - } - - .CHChildNote .CHEntry { - font-style: italic; - font-size: 8pt; - } - - .CHIndent { - margin-left: 3ex; - } - - .CHEntry a:link, - .CHEntry a:visited, - .CHEntry a:hover { - color: #606060; - } - .CHEntry a:active { - color: #800000; - } - - - - - -#Index { - background-color: #FFFFFF; - } - -/* As opposed to .PopupSearchResultsPage #Index */ -.IndexPage #Index, -.FramedIndexPage #Index, -.FramedSearchResultsPage #Index { - padding: 15px; - } - -.IndexPage #Index { - border-width: 0 0 1px 1px; - border-style: solid; - border-color: #000000; - font-size: 9pt; /* To make 27ex match the menu's 27ex. */ - margin-left: 27ex; - } - - - .IPageTitle { - font-size: 20pt; font-weight: bold; - color: #FFFFFF; background-color: #7070C0; - padding: 10px 15px 10px 15px; - border-width: 0 0 3px 0; border-color: #000000; border-style: solid; - margin: -15px -15px 0 -15px } - - .FramedSearchResultsPage .IPageTitle { - margin-bottom: 15px; - } - - .INavigationBar { - font-size: 10pt; - text-align: center; - background-color: #FFFFF0; - padding: 5px; - border-bottom: solid 1px black; - margin: 0 -15px 15px -15px; - } - - .INavigationBar a { - font-weight: bold } - - .IHeading { - font-size: 16pt; font-weight: bold; - padding: 2.5em 0 .5em 0; - text-align: center; - width: 3.5ex; - } - #IFirstHeading { - padding-top: 0; - } - - .IEntry { - font-size: 10pt; - padding-left: 1ex; - } - .PopupSearchResultsPage .IEntry { - font-size: 8pt; - padding: 1px 5px; - } - .PopupSearchResultsPage .Opera9 .IEntry, - .FramedSearchResultsPage .Opera9 .IEntry { - text-align: left; - } - .FramedSearchResultsPage .IEntry { - padding: 0; - } - - .ISubIndex { - padding-left: 3ex; padding-bottom: .5em } - .PopupSearchResultsPage .ISubIndex { - display: none; - } - - /* While it may cause some entries to look like links when they aren't, I found it's much easier to read the - index if everything's the same color. */ - .ISymbol { - font-weight: bold; color: #900000 } - - .IndexPage .ISymbolPrefix, - .FramedIndexPage .ISymbolPrefix { - font-size: 10pt; - text-align: right; - color: #C47C7C; - background-color: #F8F8F8; - border-right: 3px solid #E0E0E0; - border-left: 1px solid #E0E0E0; - padding: 0 1px 0 2px; - } - .PopupSearchResultsPage .ISymbolPrefix, - .FramedSearchResultsPage .ISymbolPrefix { - color: #900000; - } - .PopupSearchResultsPage .ISymbolPrefix { - font-size: 8pt; - } - - .IndexPage #IFirstSymbolPrefix, - .FramedIndexPage #IFirstSymbolPrefix { - border-top: 1px solid #E0E0E0; - } - .IndexPage #ILastSymbolPrefix, - .FramedIndexPage #ILastSymbolPrefix { - border-bottom: 1px solid #E0E0E0; - } - .IndexPage #IOnlySymbolPrefix, - .FramedIndexPage #IOnlySymbolPrefix { - border-top: 1px solid #E0E0E0; - border-bottom: 1px solid #E0E0E0; - } - - a.IParent, - a.IFile { - display: block; - } - - .PopupSearchResultsPage .SRStatus { - padding: 2px 5px; - font-size: 8pt; - font-style: italic; - } - .FramedSearchResultsPage .SRStatus { - font-size: 10pt; - font-style: italic; - } - - .SRResult { - display: none; - } - - - -#Footer { - font-size: 8pt; - color: #989898; - text-align: right; - } - -#Footer p { - text-indent: 0; - margin-bottom: .5em; - } - -.ContentPage #Footer, -.IndexPage #Footer { - text-align: right; - margin: 2px; - } - -.FramedMenuPage #Footer { - text-align: center; - margin: 5em 10px 10px 10px; - padding-top: 1em; - border-top: 1px solid #C8C8C8; - } - - #Footer a:link, - #Footer a:hover, - #Footer a:visited { color: #989898 } - #Footer a:active { color: #A00000 } - diff --git a/benchmarks/riscv-coremark/coremark/docs/html/styles/2.css b/benchmarks/riscv-coremark/coremark/docs/html/styles/2.css deleted file mode 100644 index 69a1d1a7a..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/styles/2.css +++ /dev/null @@ -1,6 +0,0 @@ -#Menu { - padding: 48px 0 0 0; - background: url(file:../../coremark_logo.jpg) no-repeat; - background-position: 30px 10px; - } - diff --git a/benchmarks/riscv-coremark/coremark/docs/html/styles/main.css b/benchmarks/riscv-coremark/coremark/docs/html/styles/main.css deleted file mode 100644 index a672a9492..000000000 --- a/benchmarks/riscv-coremark/coremark/docs/html/styles/main.css +++ /dev/null @@ -1,2 +0,0 @@ -@import URL("1.css"); -@import URL("2.css"); diff --git a/benchmarks/riscv-coremark/coremark/freebsd/core_portme.mak b/benchmarks/riscv-coremark/coremark/freebsd/core_portme.mak deleted file mode 100644 index 97b6d6ace..000000000 --- a/benchmarks/riscv-coremark/coremark/freebsd/core_portme.mak +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/coremark/linux/core_portme.c b/benchmarks/riscv-coremark/coremark/linux/core_portme.c deleted file mode 100755 index 6b63610d1..000000000 --- a/benchmarks/riscv-coremark/coremark/linux/core_portme.c +++ /dev/null @@ -1,338 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - #define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - { - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/coremark/linux/core_portme.h b/benchmarks/riscv-coremark/coremark/linux/core_portme.h deleted file mode 100755 index 2cf4659a4..000000000 --- a/benchmarks/riscv-coremark/coremark/linux/core_portme.h +++ /dev/null @@ -1,290 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef ee_u32 ee_ptr_int; -typedef size_t ee_size_t; -/* align_mem: - This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. -*/ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/coremark/linux/core_portme.mak b/benchmarks/riscv-coremark/coremark/linux/core_portme.mak deleted file mode 100644 index 97b6d6ace..000000000 --- a/benchmarks/riscv-coremark/coremark/linux/core_portme.mak +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/coremark/linux64/core_portme.c b/benchmarks/riscv-coremark/coremark/linux64/core_portme.c deleted file mode 100755 index fe8d29983..000000000 --- a/benchmarks/riscv-coremark/coremark/linux64/core_portme.c +++ /dev/null @@ -1,336 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - #define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/coremark/linux64/core_portme.h b/benchmarks/riscv-coremark/coremark/linux64/core_portme.h deleted file mode 100755 index 1228a679b..000000000 --- a/benchmarks/riscv-coremark/coremark/linux64/core_portme.h +++ /dev/null @@ -1,291 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef unsigned long long ee_ptr_int; -typedef size_t ee_size_t; -/* align an offset to point to a 32b value */ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/coremark/linux64/core_portme.mak b/benchmarks/riscv-coremark/coremark/linux64/core_portme.mak deleted file mode 100755 index 5cfabee32..000000000 --- a/benchmarks/riscv-coremark/coremark/linux64/core_portme.mak +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File: core_portme.mak - -# Flag: OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag: CC -# Use this flag to define compiler to use -CC = gcc -# Flag: CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -PORT_CFLAGS = -O2 -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -#Flag: LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += -lrt -# Flag: PORT_SRCS -# Port specific source files can be added here -PORT_SRCS = $(PORT_DIR)/core_portme.c -# Flag: LOAD -# Define this flag if you need to load to a target, as in a cross compile environment. - -# Flag: RUN -# Define this flag if running does not consist of simple invocation of the binary. -# In a cross compile environment, you need to define this. - -#For flashing and using a tera term macro, you could use -#LOAD = flash ADDR -#RUN = ttpmacro coremark.ttl - -#For copying to target and executing via SSH connection, you could use -#LOAD = scp $(OUTFILE) user@target:~ -#RUN = ssh user@target -c - -#For native compilation and execution -LOAD = echo Loading done -RUN = - -OEXT = .o -EXE = .exe - -# Flag: SEPARATE_COMPILE -# Define if you need to separate compilation from link stage. -# In this case, you also need to define below how to create an object file, and how to link. -ifdef SEPARATE_COMPILE - -LD = gcc -OBJOUT = -o -LFLAGS = -OFLAG = -o -COUT = -c -# Flag: PORT_OBJS -# Port specific object files can be added here -PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT) -PORT_CLEAN = *$(OEXT) - -$(OPATH)%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -endif - -# Target: port_prebuild -# Generate any files that are needed before actual build starts. -# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1 -# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line. -# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it. -# Note - Using REBUILD=1 -# -# Use make PGO=1 to invoke this sample processing. - -ifdef PGO - ifeq (,$(findstring $(PGO),gen)) - PGO_STAGE=build_pgo_gcc - CFLAGS+=-fprofile-use - endif - PORT_CLEAN+=*.gcda *.gcno gmon.out -endif - -.PHONY: port_prebuild -port_prebuild: $(PGO_STAGE) - -.PHONY: build_pgo_gcc -build_pgo_gcc: - $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1 - -# Target: port_postbuild -# Generate any files that are needed after actual build end. -# E.g. change format to srec, bin, zip in order to be able to load into flash -.PHONY: port_postbuild -port_postbuild: - -# Target: port_postrun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_postrun -port_postrun: - -# Target: port_prerun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_prerun -port_prerun: - -# Target: port_postload -# Do platform specific after load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_postload -port_postload: - -# Target: port_preload -# Do platform specific before load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_preload -port_preload: - -# FLAG: OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - -# FLAG: PERL -# Define perl executable to calculate the geomean if running separate. -PERL=/usr/bin/perl diff --git a/benchmarks/riscv-coremark/coremark/macos/core_portme.mak b/benchmarks/riscv-coremark/coremark/macos/core_portme.mak deleted file mode 100644 index 6b27c3c41..000000000 --- a/benchmarks/riscv-coremark/coremark/macos/core_portme.mak +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -NO_LIBRT = 1 -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/coremark/posix/core_portme.c b/benchmarks/riscv-coremark/coremark/posix/core_portme.c deleted file mode 100644 index f5a7f5b3d..000000000 --- a/benchmarks/riscv-coremark/coremark/posix/core_portme.c +++ /dev/null @@ -1,419 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD == MEM_MALLOC) -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void * -portable_malloc(size_t size) -{ - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void -portable_free(void *p) -{ - free(p); -} -#else -void * -portable_malloc(size_t size) -{ - return NULL; -} -void -portable_free(void *p) -{ - p = NULL; -} -#endif - -#if (SEED_METHOD == SEED_VOLATILE) -#if VALIDATION_RUN -volatile ee_s32 seed1_volatile = 0x3415; -volatile ee_s32 seed2_volatile = 0x3415; -volatile ee_s32 seed3_volatile = 0x66; -#endif -#if PERFORMANCE_RUN -volatile ee_s32 seed1_volatile = 0x0; -volatile ee_s32 seed2_volatile = 0x0; -volatile ee_s32 seed3_volatile = 0x66; -#endif -#if PROFILE_RUN -volatile ee_s32 seed1_volatile = 0x8; -volatile ee_s32 seed2_volatile = 0x8; -volatile ee_s32 seed3_volatile = 0x8; -#endif -volatile ee_s32 seed4_volatile = ITERATIONS; -volatile ee_s32 seed5_volatile = 0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is - supported by the platform. e.g. Read value from on board RTC, read value from - cpu clock cycles performance counter etc. Sample implementation for standard - time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be - measured. - - Use lower values to increase resolution, but make sure that overflow - does not occur. If there are issues with the return value overflowing, - increase this value. - */ -#if USE_CLOCK -#define NSECS_PER_SEC CLOCKS_PER_SEC -#define EE_TIMER_TICKER_RATE 1000 -#define CORETIMETYPE clock_t -#define GETMYTIME(_t) (*_t = clock()) -#define MYTIMEDIFF(fin, ini) ((fin) - (ini)) -#define TIMER_RES_DIVIDER 1 -#define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) -#define NSECS_PER_SEC 10000000 -#define EE_TIMER_TICKER_RATE 1000 -#define CORETIMETYPE FILETIME -#define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) -#define MYTIMEDIFF(fin, ini) \ - (((*(__int64 *)&fin) - (*(__int64 *)&ini)) / TIMER_RES_DIVIDER) -/* setting to millisces resolution by default with MSDEV */ -#ifndef TIMER_RES_DIVIDER -#define TIMER_RES_DIVIDER 1000 -#endif -#define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H -#define NSECS_PER_SEC 1000000000 -#define EE_TIMER_TICKER_RATE 1000 -#define CORETIMETYPE struct timespec -#define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME, _t) -#define MYTIMEDIFF(fin, ini) \ - ((fin.tv_sec - ini.tv_sec) * (NSECS_PER_SEC / TIMER_RES_DIVIDER) \ - + (fin.tv_nsec - ini.tv_nsec) / TIMER_RES_DIVIDER) -/* setting to 1/1000 of a second resolution by default with linux */ -#ifndef TIMER_RES_DIVIDER -#define TIMER_RES_DIVIDER 1000000 -#endif -#define SAMPLE_TIME_IMPLEMENTATION 1 -#else -#define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of - the benchmark. - - Implementation may be capturing a system timer (as implemented in the - example code) or zeroing some system parameters - e.g. setting the cpu clocks - cycles to 0. -*/ -void -start_time(void) -{ - GETMYTIME(&start_time_val); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3"); /*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the - benchmark. - - Implementation may be capturing a system timer (as implemented in the - example code) or other system parameters - e.g. reading the current value of - cpu cycles counter. -*/ -void -stop_time(void) -{ -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3"); /*1 */ -#endif - GETMYTIME(&stop_time_val); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other - value, as long as it can be converted to seconds by . This - methodology is taken to accomodate any hardware or simulated platform. The - sample implementation returns millisecs by default, and the resolution is - controlled by -*/ -CORE_TICKS -get_time(void) -{ - CORE_TICKS elapsed - = (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for - floating point. Default implementation implemented by the EE_TICKS_PER_SEC - macro above. -*/ -secs_ret -time_in_secs(CORE_TICKS ticks) -{ - secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts = MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void -portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i = 0; i < *argc; i++) - { - ee_printf("Arg[%d]=%s\n", i, argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) - { - ee_printf( - "ERROR! Please define ee_ptr_int to a type that holds a " - "pointer!\n"); - } - if (sizeof(ee_u32) != 4) - { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD == SEED_ARG)) - ee_printf( - "ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD > 1) && (SEED_METHOD == SEED_ARG) - int nargs = *argc, i; - if ((nargs > 1) && (*argv[1] == 'M')) - { - default_num_contexts = parseval(argv[1] + 1); - if (default_num_contexts > MULTITHREAD) - default_num_contexts = MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not - * to coremark main */ - --nargs; - for (i = 1; i < nargs; i++) - argv[i] = argv[i + 1]; - *argc = nargs; - } -#endif /* sample of potential platform specific init via command line, reset \ - the number of contexts being used if first argument is M*/ - p->portable_id = 1; -} -/* Function: portable_fini - Target specific final code -*/ -void -portable_fini(core_portable *p) -{ - p->portable_id = 0; -} - -#if (MULTITHREAD > 1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork - and shared mem, and one using fork and sockets. Other implementations using - MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork - and shared mem, and one using fork and sockets. Other implementations using - MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 -core_start_parallel(core_results *res) -{ - return (ee_u8)pthread_create( - &(res->port.thread), NULL, iterate, (void *)res); -} -ee_u8 -core_stop_parallel(core_results *res) -{ - void *retval; - return (ee_u8)pthread_join(res->port.thread, &retval); -} -#elif USE_FORK -static int key_id = 0; -ee_u8 -core_start_parallel(core_results *res) -{ - key_t key = 4321 + key_id; - key_id++; - res->port.pid = fork(); - res->port.shmid = shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid < 0) - { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid == 0) - { - iterate(res); - res->port.shm = shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *)-1) - { - ee_printf("ERROR in child shmat!\n"); - } - else - { - memcpy(res->port.shm, &(res->crc), 8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 -core_stop_parallel(core_results *res) -{ - int status; - pid_t wpid = waitpid(res->port.pid, &status, WUNTRACED); - if (wpid != res->port.pid) - { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) - ee_printf("errno=No such child %d\n", res->port.pid); - if (errno == EINTR) - ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm = shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *)-1) - { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc), res->port.shm, 8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id = 0; -ee_u8 -core_start_parallel(core_results *res) -{ - int bound, buffer_length = 8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654 + key_id); - key_id++; - res->port.pid = fork(); - if (res->port.pid == 0) - { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ - { - ee_printf("Error Creating Socket"); - } - else - { - int bytes_sent = sendto(res->port.sock, - &(res->crc), - buffer_length, - 0, - (struct sockaddr *)&(res->port.sa), - sizeof(struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock, - (struct sockaddr *)&(res->port.sa), - sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n", strerror(errno)); - return 1; -} -ee_u8 -core_stop_parallel(core_results *res) -{ - int status; - int fromlen = sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, - &(res->crc), - 8, - 0, - (struct sockaddr *)&(res->port.sa), - &fromlen); - if (recsize < 0) - { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid, &status, WUNTRACED); - if (wpid != res->port.pid) - { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) - ee_printf("errno=No such child %d\n", res->port.pid); - if (errno == EINTR) - ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error \ - "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/coremark/posix/core_portme.h b/benchmarks/riscv-coremark/coremark/posix/core_portme.h deleted file mode 100644 index e49e474b1..000000000 --- a/benchmarks/riscv-coremark/coremark/posix/core_portme.h +++ /dev/null @@ -1,314 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on - different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H - -#include "core_portme_posix_overrides.h" - -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf - function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error \ - "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION -#ifdef __GNUC__ -#define COMPILER_VERSION "GCC"__VERSION__ -#else -#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" -#endif -#endif -#ifndef COMPILER_FLAGS -#define COMPILER_FLAGS \ - FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION -#define MEM_LOCATION \ - "Please put data memory location here\n\t\t\t(e.g. code in flash, data " \ - "on heap etc)" -#define MEM_LOCATION_UNSPEC 1 -#endif - -#include - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for - 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise - coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef uintptr_t ee_ptr_int; -typedef size_t ee_size_t; -/* align an offset to point to a 32b value */ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile - time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching - parallel contexts must be defined. - - Two sample implementations are provided. Use or - to enable them. - - It is valid to have a different implementation of - and in , to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value - greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value - greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value - greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value - expected, and use argc/argv in the to set this value from the - command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD > 1) -#if USE_PTHREAD -#include -#define PARALLEL_METHOD "PThreads" -#elif USE_FORK -#include -#include -#include -#include -#include /* for memcpy */ -#define PARALLEL_METHOD "Fork" -#elif USE_SOCKET -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#define PARALLEL_METHOD "Sockets" -#else -#define PARALLEL_METHOD "Proprietary" -#error \ - "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S -{ -#if (MULTITHREAD > 1) -#if USE_PTHREAD - pthread_t thread; -#elif USE_FORK - pid_t pid; - int shmid; - void *shm; -#elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; -#endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD == SEED_VOLATILE) -#if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) -#define RUN_TYPE_FLAG 1 -#else -#if (TOTAL_DATA_SIZE == 1200) -#define PROFILE_RUN 1 -#else -#define PERFORMANCE_RUN 1 -#endif -#endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/coremark/posix/core_portme.mak b/benchmarks/riscv-coremark/coremark/posix/core_portme.mak deleted file mode 100755 index e6be71a7e..000000000 --- a/benchmarks/riscv-coremark/coremark/posix/core_portme.mak +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File: core_portme.mak - -# Flag: OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag: CC -# Use this flag to define compiler to use -CC?= cc -# Flag: CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -PORT_CFLAGS = -O2 -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -Iposix -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -# Flag: NO_LIBRT -# Define if the platform does not provide a librt -ifndef NO_LIBRT -#Flag: LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += -lrt -endif -# Flag: PORT_SRCS -# Port specific source files can be added here -PORT_SRCS = posix/core_portme.c -vpath %.c posix -vpath %.h posix -vpath %.mak posix -# Flag: EXTRA_DEPENDS -# Port specific extra build dependencies. -# Some ports inherit from us, so ensure this Makefile is always a dependency. -EXTRA_DEPENDS += posix/core_portme.mak -# Flag: LOAD -# Define this flag if you need to load to a target, as in a cross compile environment. - -# Flag: RUN -# Define this flag if running does not consist of simple invocation of the binary. -# In a cross compile environment, you need to define this. - -#For flashing and using a tera term macro, you could use -#LOAD = flash ADDR -#RUN = ttpmacro coremark.ttl - -#For copying to target and executing via SSH connection, you could use -#LOAD = scp $(OUTFILE) user@target:~ -#RUN = ssh user@target -c - -#For native compilation and execution -LOAD = echo Loading done -RUN = - -OEXT = .o -EXE = .exe - -# Flag: SEPARATE_COMPILE -# Define if you need to separate compilation from link stage. -# In this case, you also need to define below how to create an object file, and how to link. -ifdef SEPARATE_COMPILE - -LD = gcc -OBJOUT = -o -LFLAGS = -OFLAG = -o -COUT = -c -# Flag: PORT_OBJS -# Port specific object files can be added here -PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT) -PORT_CLEAN = *$(OEXT) - -$(OPATH)%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -endif - -# Target: port_prebuild -# Generate any files that are needed before actual build starts. -# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1 -# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line. -# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it. -# Note - Using REBUILD=1 -# -# Use make PGO=1 to invoke this sample processing. - -ifdef PGO - ifeq (,$(findstring $(PGO),gen)) - PGO_STAGE=build_pgo_gcc - CFLAGS+=-fprofile-use - endif - PORT_CLEAN+=*.gcda *.gcno gmon.out -endif - -.PHONY: port_prebuild -port_prebuild: $(PGO_STAGE) - -.PHONY: build_pgo_gcc -build_pgo_gcc: - $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1 - -# Target: port_postbuild -# Generate any files that are needed after actual build end. -# E.g. change format to srec, bin, zip in order to be able to load into flash -.PHONY: port_postbuild -port_postbuild: - -# Target: port_postrun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_postrun -port_postrun: - -# Target: port_prerun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_prerun -port_prerun: - -# Target: port_postload -# Do platform specific after load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_postload -port_postload: - -# Target: port_preload -# Do platform specific before load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_preload -port_preload: - -# FLAG: OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - -# FLAG: PERL -# Define perl executable to calculate the geomean if running separate. -PERL=/usr/bin/perl diff --git a/benchmarks/riscv-coremark/coremark/posix/core_portme_posix_overrides.h b/benchmarks/riscv-coremark/coremark/posix/core_portme_posix_overrides.h deleted file mode 100644 index c0e998adf..000000000 --- a/benchmarks/riscv-coremark/coremark/posix/core_portme_posix_overrides.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains additional configuration constants required to execute on - different platforms over and above the POSIX defaults -*/ -#ifndef CORE_PORTME_POSIX_OVERRIDES_H -#define CORE_PORTME_POSIX_OVERRIDES_H - -/* None by default */ - -#endif diff --git a/benchmarks/riscv-coremark/coremark/rtems/core_portme.mak b/benchmarks/riscv-coremark/coremark/rtems/core_portme.mak deleted file mode 100644 index 6b27c3c41..000000000 --- a/benchmarks/riscv-coremark/coremark/rtems/core_portme.mak +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -NO_LIBRT = 1 -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/coremark/rtems/init.c b/benchmarks/riscv-coremark/coremark/rtems/init.c deleted file mode 100644 index 64d3e59ae..000000000 --- a/benchmarks/riscv-coremark/coremark/rtems/init.c +++ /dev/null @@ -1,63 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2021 Hesham Almatary - * - * This software was developed by SRI International and the University of - * Cambridge Computer Laboratory (Department of Computer Science and - * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the - * DARPA SSITH research programme. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include - -int main( - int argc, - void **args -); - -rtems_task Init( - rtems_task_argument ignored -); - -rtems_task Init( - rtems_task_argument ignored -) -{ - int ret = main(0, NULL); - exit(ret); -} - -/* configuration information */ -#define CONFIGURE_APPLICATION_NEEDS_SIMPLE_CONSOLE_DRIVER -#define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER - -#define CONFIGURE_MAXIMUM_TASKS 20 - -#define CONFIGURE_RTEMS_INIT_TASKS_TABLE - -#define CONFIGURE_INIT - -#include diff --git a/benchmarks/riscv-coremark/coremark/simple/core_portme.c b/benchmarks/riscv-coremark/coremark/simple/core_portme.c deleted file mode 100644 index b95e3b21e..000000000 --- a/benchmarks/riscv-coremark/coremark/simple/core_portme.c +++ /dev/null @@ -1,149 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" - -#if VALIDATION_RUN -volatile ee_s32 seed1_volatile = 0x3415; -volatile ee_s32 seed2_volatile = 0x3415; -volatile ee_s32 seed3_volatile = 0x66; -#endif -#if PERFORMANCE_RUN -volatile ee_s32 seed1_volatile = 0x0; -volatile ee_s32 seed2_volatile = 0x0; -volatile ee_s32 seed3_volatile = 0x66; -#endif -#if PROFILE_RUN -volatile ee_s32 seed1_volatile = 0x8; -volatile ee_s32 seed2_volatile = 0x8; -volatile ee_s32 seed3_volatile = 0x8; -#endif -volatile ee_s32 seed4_volatile = ITERATIONS; -volatile ee_s32 seed5_volatile = 0; -/* Porting : Timing functions - How to capture time and convert to seconds must be ported to whatever is - supported by the platform. e.g. Read value from on board RTC, read value from - cpu clock cycles performance counter etc. Sample implementation for standard - time.h and windows.h definitions included. -*/ -/* Define : TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be - measured. - - Use lower values to increase resolution, but make sure that overflow - does not occur. If there are issues with the return value overflowing, - increase this value. - */ -#define NSECS_PER_SEC CLOCKS_PER_SEC -#define CORETIMETYPE clock_t -#define GETMYTIME(_t) (*_t = clock()) -#define MYTIMEDIFF(fin, ini) ((fin) - (ini)) -#define TIMER_RES_DIVIDER 1 -#define SAMPLE_TIME_IMPLEMENTATION 1 -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function : start_time - This function will be called right before starting the timed portion of - the benchmark. - - Implementation may be capturing a system timer (as implemented in the - example code) or zeroing some system parameters - e.g. setting the cpu clocks - cycles to 0. -*/ -void -start_time(void) -{ - GETMYTIME(&start_time_val); -} -/* Function : stop_time - This function will be called right after ending the timed portion of the - benchmark. - - Implementation may be capturing a system timer (as implemented in the - example code) or other system parameters - e.g. reading the current value of - cpu cycles counter. -*/ -void -stop_time(void) -{ - GETMYTIME(&stop_time_val); -} -/* Function : get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other - value, as long as it can be converted to seconds by . This - methodology is taken to accomodate any hardware or simulated platform. The - sample implementation returns millisecs by default, and the resolution is - controlled by -*/ -CORE_TICKS -get_time(void) -{ - CORE_TICKS elapsed - = (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function : time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for - floating point. Default implementation implemented by the EE_TICKS_PER_SEC - macro above. -*/ -secs_ret -time_in_secs(CORE_TICKS ticks) -{ - secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} - -ee_u32 default_num_contexts = 1; - -/* Function : portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void -portable_init(core_portable *p, int *argc, char *argv[]) -{ - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) - { - ee_printf( - "ERROR! Please define ee_ptr_int to a type that holds a " - "pointer!\n"); - } - if (sizeof(ee_u32) != 4) - { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } - p->portable_id = 1; -} -/* Function : portable_fini - Target specific final code -*/ -void -portable_fini(core_portable *p) -{ - p->portable_id = 0; -} diff --git a/benchmarks/riscv-coremark/coremark/simple/core_portme.h b/benchmarks/riscv-coremark/coremark/simple/core_portme.h deleted file mode 100644 index dfd94cbfc..000000000 --- a/benchmarks/riscv-coremark/coremark/simple/core_portme.h +++ /dev/null @@ -1,208 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic : Description - This file contains configuration constants required to execute on - different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration : HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration : HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration : USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 1 -#endif -/* Configuration : HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration : HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf - function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration : CORE_TICKS - Define type of return from the timing functions. - */ -#include -typedef clock_t CORE_TICKS; - -/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION -#ifdef __GNUC__ -#define COMPILER_VERSION "GCC"__VERSION__ -#else -#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" -#endif -#endif -#ifndef COMPILER_FLAGS -#define COMPILER_FLAGS \ - FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION -#define MEM_LOCATION "STACK" -#endif - -/* Data Types : - To avoid compiler issues, define the data types that need ot be used for - 8b, 16b and 32b in . - - *Imprtant* : - ee_ptr_int needs to be the data type used to hold pointers, otherwise - coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef ee_u32 ee_ptr_int; -typedef size_t ee_size_t; -/* align_mem : - This macro is used to align an offset to point to a 32b value. It is - used in the Matrix algorithm to initialize the input memory blocks. -*/ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) - -/* Configuration : SEED_METHOD - Defines method to get seed values that cannot be computed at compile - time. - - Valid values : - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_VOLATILE -#endif - -/* Configuration : MEM_METHOD - Defines method to get a block of memry. - - Valid values : - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_STACK -#endif - -/* Configuration : MULTITHREAD - Define for parallel execution - - Valid values : - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note : - If this flag is defined to more then 1, an implementation for launching - parallel contexts must be defined. - - Two sample implementations are provided. Use or - to enable them. - - It is valid to have a different implementation of - and in , to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#define USE_PTHREAD 0 -#define USE_FORK 0 -#define USE_SOCKET 0 -#endif - -/* Configuration : MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values : - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported - - Note : - This flag only matters if MULTITHREAD has been defined to a value - greater then 1. -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration : MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values : - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable : default_num_contexts - Not used for this simple port, must cintain the value 1. -*/ -extern ee_u32 default_num_contexts; - -typedef struct CORE_PORTABLE_S -{ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \ - && !defined(VALIDATION_RUN) -#if (TOTAL_DATA_SIZE == 1200) -#define PROFILE_RUN 1 -#elif (TOTAL_DATA_SIZE == 2000) -#define PERFORMANCE_RUN 1 -#else -#define VALIDATION_RUN 1 -#endif -#endif - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/coremark/simple/core_portme.mak b/benchmarks/riscv-coremark/coremark/simple/core_portme.mak deleted file mode 100755 index 61c3db683..000000000 --- a/benchmarks/riscv-coremark/coremark/simple/core_portme.mak +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File : core_portme.mak - -# Flag : OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag : CC -# Use this flag to define compiler to use -CC = gcc -# Flag : CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -PORT_CFLAGS = -O2 -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -#Flag : LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note : On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END = -# Flag : PORT_SRCS -# Port specific source files can be added here -PORT_SRCS = $(PORT_DIR)/core_portme.c -# Flag : LOAD -# For a simple port, we assume self hosted compile and run, no load needed. - -# Flag : RUN -# For a simple port, we assume self hosted compile and run, simple invocation of the executable - -#For native compilation and execution -LOAD = echo Loading done -RUN = - -OEXT = .o -EXE = .exe - -# Target : port_pre% and port_post% -# For the purpose of this simple port, no pre or post steps needed. - -.PHONY : port_prebuild port_postbuild port_prerun port_postrun port_preload port_postload -port_pre% port_post% : - -# FLAG : OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - diff --git a/benchmarks/riscv-coremark/old/README.md b/benchmarks/riscv-coremark/old/README.md deleted file mode 100644 index 7c02eb1bf..000000000 --- a/benchmarks/riscv-coremark/old/README.md +++ /dev/null @@ -1,23 +0,0 @@ -Coremark EEMBC Wrapper -====================== - -This repository provides the utility files to port [CoreMark EEMBC](https://www.eembc.org/coremark/) to RISC-V. - -### Requirements - - - You must have installed the RISC-V tools - -### Setup - - - `git submodule update --init` - - Run the `./build-coremark.sh` script that does the following - - Builds a version of Coremark for Linux or pk (coremark.riscv) - - Builds a version of Coremark for bare-metal (coremark.bare.riscv) - - Copies the output binaries into this directory - -### Default Files - -The default files target **RV64GC** and use minimal amount of compilation flags. Additionally, the `*.mak` file in the `riscv64` -folder setups `spike pk` as the default `run` rule. - -Feel free to change these to suit your needs. diff --git a/benchmarks/riscv-coremark/old/extraPortmes/README.md b/benchmarks/riscv-coremark/old/extraPortmes/README.md deleted file mode 100644 index 681fc4d8b..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/README.md +++ /dev/null @@ -1,7 +0,0 @@ -This directory is a backup for the portme files associated with cygwin, linux, and linux64 - -This backup is needed in the event that a user replaces the coremark directory with a clean version -from EEMBC's github page (the clean version does not have the cygwin, linux, -and linux64 files that our version does). - -Please do not delete this directory under any circumstance. \ No newline at end of file diff --git a/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.c b/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.c deleted file mode 100755 index fe8d29983..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.c +++ /dev/null @@ -1,336 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - #define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.h b/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.h deleted file mode 100755 index 9471b12ec..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.h +++ /dev/null @@ -1,293 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef ee_u32 ee_ptr_int; -typedef size_t ee_size_t; -/* align_mem: - This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. -*/ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.mak b/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.mak deleted file mode 100644 index 97b6d6ace..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/cygwin/core_portme.mak +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.c b/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.c deleted file mode 100755 index 6b63610d1..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.c +++ /dev/null @@ -1,338 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - #define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - { - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.h b/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.h deleted file mode 100755 index 2cf4659a4..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.h +++ /dev/null @@ -1,290 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef ee_u32 ee_ptr_int; -typedef size_t ee_size_t; -/* align_mem: - This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. -*/ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.mak b/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.mak deleted file mode 100644 index 97b6d6ace..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/linux/core_portme.mak +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -include posix/core_portme.mak diff --git a/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.c b/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.c deleted file mode 100755 index fe8d29983..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.c +++ /dev/null @@ -1,336 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - #define SAMPLE_TIME_IMPLEMENTATION 0 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.h b/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.h deleted file mode 100755 index 1228a679b..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.h +++ /dev/null @@ -1,291 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 1 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -#error "Please define type of CORE_TICKS and implement start_time, end_time get_time and time_in_secs functions!" -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef unsigned long long ee_ptr_int; -typedef size_t ee_size_t; -/* align an offset to point to a 32b value */ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.mak b/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.mak deleted file mode 100755 index 5cfabee32..000000000 --- a/benchmarks/riscv-coremark/old/extraPortmes/linux64/core_portme.mak +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File: core_portme.mak - -# Flag: OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag: CC -# Use this flag to define compiler to use -CC = gcc -# Flag: CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -PORT_CFLAGS = -O2 -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -#Flag: LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += -lrt -# Flag: PORT_SRCS -# Port specific source files can be added here -PORT_SRCS = $(PORT_DIR)/core_portme.c -# Flag: LOAD -# Define this flag if you need to load to a target, as in a cross compile environment. - -# Flag: RUN -# Define this flag if running does not consist of simple invocation of the binary. -# In a cross compile environment, you need to define this. - -#For flashing and using a tera term macro, you could use -#LOAD = flash ADDR -#RUN = ttpmacro coremark.ttl - -#For copying to target and executing via SSH connection, you could use -#LOAD = scp $(OUTFILE) user@target:~ -#RUN = ssh user@target -c - -#For native compilation and execution -LOAD = echo Loading done -RUN = - -OEXT = .o -EXE = .exe - -# Flag: SEPARATE_COMPILE -# Define if you need to separate compilation from link stage. -# In this case, you also need to define below how to create an object file, and how to link. -ifdef SEPARATE_COMPILE - -LD = gcc -OBJOUT = -o -LFLAGS = -OFLAG = -o -COUT = -c -# Flag: PORT_OBJS -# Port specific object files can be added here -PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT) -PORT_CLEAN = *$(OEXT) - -$(OPATH)%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -endif - -# Target: port_prebuild -# Generate any files that are needed before actual build starts. -# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1 -# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line. -# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it. -# Note - Using REBUILD=1 -# -# Use make PGO=1 to invoke this sample processing. - -ifdef PGO - ifeq (,$(findstring $(PGO),gen)) - PGO_STAGE=build_pgo_gcc - CFLAGS+=-fprofile-use - endif - PORT_CLEAN+=*.gcda *.gcno gmon.out -endif - -.PHONY: port_prebuild -port_prebuild: $(PGO_STAGE) - -.PHONY: build_pgo_gcc -build_pgo_gcc: - $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1 - -# Target: port_postbuild -# Generate any files that are needed after actual build end. -# E.g. change format to srec, bin, zip in order to be able to load into flash -.PHONY: port_postbuild -port_postbuild: - -# Target: port_postrun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_postrun -port_postrun: - -# Target: port_prerun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_prerun -port_prerun: - -# Target: port_postload -# Do platform specific after load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_postload -port_postload: - -# Target: port_preload -# Do platform specific before load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_preload -port_preload: - -# FLAG: OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - -# FLAG: PERL -# Define perl executable to calculate the geomean if running separate. -PERL=/usr/bin/perl diff --git a/benchmarks/riscv-coremark/old/riscv64/core_portme.c b/benchmarks/riscv-coremark/old/riscv64/core_portme.c deleted file mode 100755 index 8f17cb8bd..000000000 --- a/benchmarks/riscv-coremark/old/riscv64/core_portme.c +++ /dev/null @@ -1,346 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - // Defined for RISCV - #define NSECS_PER_SEC 1000000000 // TODO: What freq are we assuming? - #define EE_TIMER_TICKER_RATE 1000 // TODO: What is this? - #define CORETIMETYPE clock_t - #define read_csr(reg) ({ unsigned long __tmp; \ - asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ - __tmp; }) - #define GETMYTIME(_t) (*_t=read_csr(cycle)) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - GETMYTIME(&start_time_val ); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(&stop_time_val ); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts=MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/old/riscv64/core_portme.h b/benchmarks/riscv-coremark/old/riscv64/core_portme.h deleted file mode 100755 index 4e28afd36..000000000 --- a/benchmarks/riscv-coremark/old/riscv64/core_portme.h +++ /dev/null @@ -1,296 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 0 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -/* Configuration: size_t and clock_t - Note these need to match the size of the clock output and the xLen the processor supports - */ -typedef unsigned long int size_t; -typedef unsigned long int clock_t; -typedef clock_t CORE_TICKS; -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef unsigned long long ee_ptr_int; -typedef size_t ee_size_t; -/* align an offset to point to a 32b value */ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_ARG -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_MALLOC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 0 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/old/riscv64/core_portme.mak b/benchmarks/riscv-coremark/old/riscv64/core_portme.mak deleted file mode 100755 index edc341abc..000000000 --- a/benchmarks/riscv-coremark/old/riscv64/core_portme.mak +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File: core_portme.mak - -# Flag: RISCVTOOLS -# Use this flag to point to your RISCV tools -RISCVTOOLS=$(RISCV) -# Flag: RISCVTYPE -# Type of toolchain to use -RISCVTYPE=riscv64-unknown-elf -# Flag: OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag: CC -# Use this flag to define compiler to use -CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc -# Flag: CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -#PORT_CFLAGS = -O2 -static -std=gnu99 -PORT_CFLAGS = -O2 -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -#Flag: LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += -# Flag: PORT_SRCS -# Port specific source files can be added here -PORT_SRCS = $(PORT_DIR)/core_portme.c -# Flag: LOAD -# Define this flag if you need to load to a target, as in a cross compile environment. - -# Flag: RUN -# Define this flag if running does not consist of simple invocation of the binary. -# In a cross compile environment, you need to define this. - -#For flashing and using a tera term macro, you could use -#LOAD = flash ADDR -#RUN = ttpmacro coremark.ttl - -#For copying to target and executing via SSH connection, you could use -#LOAD = scp $(OUTFILE) user@target:~ -#RUN = ssh user@target -c - -#For native compilation and execution -LOAD = echo Loading done -RUN = spike pk - -OEXT = .o -EXE = .riscv - -# Flag: SEPARATE_COMPILE -# Define if you need to separate compilation from link stage. -# In this case, you also need to define below how to create an object file, and how to link. -ifdef SEPARATE_COMPILE - -LD = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc -OBJOUT = -o -LFLAGS = -OFLAG = -o -COUT = -c -# Flag: PORT_OBJS -# Port specific object files can be added here -PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT) -PORT_CLEAN = *$(OEXT) - -$(OPATH)%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -endif - -# Target: port_prebuild -# Generate any files that are needed before actual build starts. -# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1 -# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line. -# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it. -# Note - Using REBUILD=1 -# -# Use make PGO=1 to invoke this sample processing. - -ifdef PGO - ifeq (,$(findstring $(PGO),gen)) - PGO_STAGE=build_pgo_gcc - CFLAGS+=-fprofile-use - endif - PORT_CLEAN+=*.gcda *.gcno gmon.out -endif - -.PHONY: port_prebuild -port_prebuild: $(PGO_STAGE) - -.PHONY: build_pgo_gcc -build_pgo_gcc: - $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1 - -# Target: port_postbuild -# Generate any files that are needed after actual build end. -# E.g. change format to srec, bin, zip in order to be able to load into flash -.PHONY: port_postbuild -port_postbuild: - -# Target: port_postrun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_postrun -port_postrun: - -# Target: port_prerun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_prerun -port_prerun: - -# Target: port_postload -# Do platform specific after load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_postload -port_postload: - -# Target: port_preload -# Do platform specific before load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_preload -port_preload: - -# FLAG: OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - -# FLAG: PERL -# Define perl executable to calculate the geomean if running separate. -PERL=/usr/bin/perl diff --git a/benchmarks/riscv-coremark/old/trace b/benchmarks/riscv-coremark/old/trace deleted file mode 100644 index 7c76d0bfa..000000000 --- a/benchmarks/riscv-coremark/old/trace +++ /dev/null @@ -1,48 +0,0 @@ -Imperas riscvOVPsimPlus - - -riscvOVPsimPlus (64-Bit) v20210329.0 Open Virtual Platform simulator from www.IMPERAS.com. -Copyright (c) 2005-2021 Imperas Software Ltd. Contains Imperas Proprietary Information. -Licensed Software, All Rights Reserved. -Visit www.IMPERAS.com for multicore debug, verification and analysis solutions. - -riscvOVPsimPlus started: Wed May 12 17:55:33 2021 - - -Info (GDBT_PORT) Host: Tera.Eng.HMC.Edu, Port: 55460 -Info (DBC_LGDB) Starting Debugger /cad/riscv/imperas-riscv-tests/riscv-ovpsim-plus/bin/Linux64/riscv-none-embed-gdb -Info (GDBT_WAIT) Waiting for remote debugger to connect... -Info (OR_OF) Target 'riscvOVPsim/cpu' has object file read from 'coremark.bare.riscv' -Info (OR_PH) Program Headers: -Info (OR_PH) Type Offset VirtAddr PhysAddr -Info (OR_PH) FileSiz MemSiz Flags Align -Info (OR_PD) LOAD 0x0000000000001000 0x0000000080000000 0x0000000080000000 -Info (OR_PD) 0x0000000000000204 0x0000000000000204 R-E 1000 -Info (OR_PD) LOAD 0x0000000000002000 0x0000000080001000 0x0000000080001000 -Info (OR_PD) 0x00000000000047e0 0x0000000000004ff0 RWE 1000 -Info (GDBT_CONNECTED) Client connected -Info (GDBT_GON) Client disappeared 'riscvOVPsim/cpu' -Info -Info --------------------------------------------------- -Info CPU 'riscvOVPsim/cpu' STATISTICS -Info Type : riscv (RV64GC) -Info Nominal MIPS : 100 -Info Final program counter : 0x80003558 -Info Simulated instructions: 1,455,608 -Info Simulated MIPS : 0.0 -Info --------------------------------------------------- -Info -Info --------------------------------------------------- -Info SIMULATION TIME STATISTICS -Info Simulated time : 0.02 seconds -Info User time : 99.23 seconds -Info System time : 254.08 seconds -Info Elapsed time : 1107.49 seconds -Info --------------------------------------------------- - -riscvOVPsimPlus finished: Wed May 12 18:14:04 2021 - - -riscvOVPsimPlus (64-Bit) v20210329.0 Open Virtual Platform simulator from www.IMPERAS.com. -Visit www.IMPERAS.com for multicore debug, verification and analysis solutions. - diff --git a/benchmarks/riscv-coremark/old/transferobjdump.sh b/benchmarks/riscv-coremark/old/transferobjdump.sh deleted file mode 100755 index 69578566a..000000000 --- a/benchmarks/riscv-coremark/old/transferobjdump.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash -./build-coremark.sh - -riscv64-unknown-elf-objdump -D coremark.bare.riscv > coremarkcodemod.bare.riscv.objdump -cp coremarkcodemod.bare.riscv.objdump ~/riscv-wally/tests/imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/. -pushd ~/riscv-wally/tests/imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark -./exe2memfile.pl coremarkcodemod.bare.riscv -popd diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.c b/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.c deleted file mode 100755 index 86917f939..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.c +++ /dev/null @@ -1,382 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -#include -#include -#include "coremark.h" -#if CALLGRIND_RUN -#include -#endif - -#if (MEM_METHOD==MEM_MALLOC) -#include -/* Function: portable_malloc - Provide malloc() functionality in a platform specific way. -*/ -void *portable_malloc(size_t size) { - return malloc(size); -} -/* Function: portable_free - Provide free() functionality in a platform specific way. -*/ -void portable_free(void *p) { - free(p); -} -#else -void *portable_malloc(size_t size) { - return NULL; -} -void portable_free(void *p) { - p=NULL; -} -#endif - -#if (SEED_METHOD==SEED_VOLATILE) -#if VALIDATION_RUN - volatile ee_s32 seed1_volatile=0x3415; - volatile ee_s32 seed2_volatile=0x3415; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PERFORMANCE_RUN - volatile ee_s32 seed1_volatile=0x0; - volatile ee_s32 seed2_volatile=0x0; - volatile ee_s32 seed3_volatile=0x66; -#endif -#if PROFILE_RUN - volatile ee_s32 seed1_volatile=0x8; - volatile ee_s32 seed2_volatile=0x8; - volatile ee_s32 seed3_volatile=0x8; -#endif - volatile ee_s32 seed4_volatile=ITERATIONS; - volatile ee_s32 seed5_volatile=0; -#endif -/* Porting: Timing functions - How to capture time and convert to seconds must be ported to whatever is supported by the platform. - e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. - Sample implementation for standard time.h and windows.h definitions included. -*/ -/* Define: TIMER_RES_DIVIDER - Divider to trade off timer resolution and total time that can be measured. - - Use lower values to increase resolution, but make sure that overflow does not occur. - If there are issues with the return value overflowing, increase this value. - */ -#if USE_CLOCK - #define NSECS_PER_SEC CLOCKS_PER_SEC - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE clock_t - #define GETMYTIME(_t) (*_t=clock()) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - #define TIMER_RES_DIVIDER 1 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif defined(_MSC_VER) - #define NSECS_PER_SEC 10000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE FILETIME - #define GETMYTIME(_t) GetSystemTimeAsFileTime(_t) - #define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER) - /* setting to millisces resolution by default with MSDEV */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#elif HAS_TIME_H - #define NSECS_PER_SEC 1000000000 - #define EE_TIMER_TICKER_RATE 1000 - #define CORETIMETYPE struct timespec - #define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t) - #define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER) - /* setting to 1/1000 of a second resolution by default with linux */ - #ifndef TIMER_RES_DIVIDER - #define TIMER_RES_DIVIDER 1000000 - #endif - #define SAMPLE_TIME_IMPLEMENTATION 1 -#else - // Defined for RISCV - #define NSECS_PER_SEC 1000000000 // TODO: What freq are we assuming? - #define EE_TIMER_TICKER_RATE 1000 // TODO: What is this? - #define CORETIMETYPE clock_t - #define read_csr(reg) ({ unsigned long __tmp; \ - asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ - __tmp; }) - #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8) - #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) - // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms) - #define TIMER_RES_DIVIDER 10000 - #define SAMPLE_TIME_IMPLEMENTATION 1 -#endif -#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) - -#if SAMPLE_TIME_IMPLEMENTATION -/** Define Host specific (POSIX), or target specific global time variables. */ -static CORETIMETYPE start_time_val, stop_time_val; -static unsigned long start_instr_val, stop_instr_val; - -/* Function: minstretFunc - This function will count the number of instructions. -*/ -unsigned long minstretFunc(void) -{ - unsigned long minstretRead = read_csr(minstret); - //ee_printf("Minstret is %lu\n", minstretRead); - return minstretRead; -} - -/* Function: minstretDiff - This function will take the difference between the first and second reads from the - MINSTRET csr to determine the number of machine instructions retired between two points - of time -*/ -unsigned long minstretDiff(void) -{ - unsigned long minstretDifference = MYTIMEDIFF(stop_instr_val, start_instr_val); - return minstretDifference; -} - -/* Function: start_time - This function will be called right before starting the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. -*/ -void start_time(void) { - start_instr_val = minstretFunc(); - GETMYTIME(start_time_val); - //ee_printf("Timer started\n"); - //ee_printf(" MTIME: %u\n", start_time_val); -#if CALLGRIND_RUN - CALLGRIND_START_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif -} -/* Function: stop_time - This function will be called right after ending the timed portion of the benchmark. - - Implementation may be capturing a system timer (as implemented in the example code) - or other system parameters - e.g. reading the current value of cpu cycles counter. -*/ -void stop_time(void) { -#if CALLGRIND_RUN - CALLGRIND_STOP_INSTRUMENTATION -#endif -#if MICA - asm volatile("int3");/*1 */ -#endif - GETMYTIME(stop_time_val); - stop_instr_val = minstretFunc(); - //ee_printf("Timer stopped\n"); - //ee_printf(" MTIME: %u\n", stop_time_val); -} -/* Function: get_time - Return an abstract "ticks" number that signifies time on the system. - - Actual value returned may be cpu cycles, milliseconds or any other value, - as long as it can be converted to seconds by . - This methodology is taken to accomodate any hardware or simulated platform. - The sample implementation returns millisecs by default, - and the resolution is controlled by -*/ -CORE_TICKS get_time(void) { - CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); - unsigned long instructions = minstretDiff(); - ee_printf(" Called get_time\n"); - ee_printf(" Elapsed MTIME: %u\n", elapsed); - ee_printf(" Elapsed MINSTRET: %lu\n", instructions); - ee_printf(" CPI: %lu / %lu\n", elapsed, instructions); - return elapsed; -} -/* Function: time_in_secs - Convert the value returned by get_time to seconds. - - The type is used to accomodate systems with no support for floating point. - Default implementation implemented by the EE_TICKS_PER_SEC macro above. -*/ -secs_ret time_in_secs(CORE_TICKS ticks) { - secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; - int retvalint = (int)retval; - //ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint); - return retval; -} -#else -#error "Please implement timing functionality in core_portme.c" -#endif /* SAMPLE_TIME_IMPLEMENTATION */ - -ee_u32 default_num_contexts = MULTITHREAD; - -/* Function: portable_init - Target specific initialization code - Test for some common mistakes. -*/ -void portable_init(core_portable *p, int *argc, char *argv[]) -{ -#if PRINT_ARGS - int i; - for (i=0; i<*argc; i++) { - ee_printf("Arg[%d]=%s\n",i,argv[i]); - } -#endif - if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { - ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); - } - if (sizeof(ee_u32) != 4) { - ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); - } -#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG)) - ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n"); -#endif - -#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG) - int nargs=*argc,i; - if ((nargs>1) && (*argv[1]=='M')) { - default_num_contexts=parseval(argv[1]+1); - if (default_num_contexts>MULTITHREAD) - default_num_contexts=MULTITHREAD; - /* Shift args since first arg is directed to the portable part and not to coremark main */ - --nargs; - for (i=1; i*/ - p->portable_id=1; -} -/* Function: portable_fini - Target specific final code -*/ -void portable_fini(core_portable *p) -{ - p->portable_id=0; -} - -#if (MULTITHREAD>1) - -/* Function: core_start_parallel - Start benchmarking in a parallel context. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -/* Function: core_stop_parallel - Stop a parallel context execution of coremark, and gather the results. - - Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets. - Other implementations using MCAPI or other standards can easily be devised. -*/ -#if USE_PTHREAD -ee_u8 core_start_parallel(core_results *res) { - return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res); -} -ee_u8 core_stop_parallel(core_results *res) { - void *retval; - return (ee_u8)pthread_join(res->port.thread,&retval); -} -#elif USE_FORK -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - key_t key=4321+key_id; - key_id++; - res->port.pid=fork(); - res->port.shmid=shmget(key, 8, IPC_CREAT | 0666); - if (res->port.shmid<0) { - ee_printf("ERROR in shmget!\n"); - } - if (res->port.pid==0) { - iterate(res); - res->port.shm=shmat(res->port.shmid, NULL, 0); - /* copy the validation values to the shared memory area and quit*/ - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in child shmat!\n"); - } else { - memcpy(res->port.shm,&(res->crc),8); - shmdt(res->port.shm); - } - exit(0); - } - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - /* after process is done, get the values from the shared memory area */ - res->port.shm=shmat(res->port.shmid, NULL, 0); - if (res->port.shm == (char *) -1) { - ee_printf("ERROR in parent shmat!\n"); - return 0; - } - memcpy(&(res->crc),res->port.shm,8); - shmdt(res->port.shm); - return 1; -} -#elif USE_SOCKET -static int key_id=0; -ee_u8 core_start_parallel(core_results *res) { - int bound, buffer_length=8; - res->port.sa.sin_family = AF_INET; - res->port.sa.sin_addr.s_addr = htonl(0x7F000001); - res->port.sa.sin_port = htons(7654+key_id); - key_id++; - res->port.pid=fork(); - if (res->port.pid==0) { /* benchmark child */ - iterate(res); - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (-1 == res->port.sock) /* if socket failed to initialize, exit */ { - ee_printf("Error Creating Socket"); - } else { - int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in)); - if (bytes_sent < 0) - ee_printf("Error sending packet: %s\n", strerror(errno)); - close(res->port.sock); /* close the socket */ - } - exit(0); - } - /* parent process, open the socket */ - res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); - bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr)); - if (bound < 0) - ee_printf("bind(): %s\n",strerror(errno)); - return 1; -} -ee_u8 core_stop_parallel(core_results *res) { - int status; - int fromlen=sizeof(struct sockaddr); - int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen); - if (recsize < 0) { - ee_printf("Error in receive: %s\n", strerror(errno)); - return 0; - } - pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED); - if (wpid != res->port.pid) { - ee_printf("ERROR waiting for child.\n"); - if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid); - if (errno == EINTR) ee_printf("errno=Interrupted\n"); - return 0; - } - return 1; -} -#else /* no standard multicore implementation */ -#error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* multithread implementations */ -#endif diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.h b/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.h deleted file mode 100755 index ef26e88ad..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.h +++ /dev/null @@ -1,296 +0,0 @@ -/* -Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -Original Author: Shay Gal-on -*/ - -/* Topic: Description - This file contains configuration constants required to execute on different platforms -*/ -#ifndef CORE_PORTME_H -#define CORE_PORTME_H -/************************/ -/* Data types and settings */ -/************************/ -/* Configuration: HAS_FLOAT - Define to 1 if the platform supports floating point. -*/ -#ifndef HAS_FLOAT -#define HAS_FLOAT 1 -#endif -/* Configuration: HAS_TIME_H - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef HAS_TIME_H -#define HAS_TIME_H 0 -#endif -/* Configuration: USE_CLOCK - Define to 1 if platform has the time.h header file, - and implementation of functions thereof. -*/ -#ifndef USE_CLOCK -#define USE_CLOCK 0 -#endif -/* Configuration: HAS_STDIO - Define to 1 if the platform has stdio.h. -*/ -#ifndef HAS_STDIO -#define HAS_STDIO 1 -#endif -/* Configuration: HAS_PRINTF - Define to 1 if the platform has stdio.h and implements the printf function. -*/ -#ifndef HAS_PRINTF -#define HAS_PRINTF 1 -#endif - -/* Configuration: CORE_TICKS - Define type of return from the timing functions. - */ -#if defined(_MSC_VER) -#include -typedef size_t CORE_TICKS; -#elif HAS_TIME_H -#include -typedef clock_t CORE_TICKS; -#else -/* Configuration: size_t and clock_t - Note these need to match the size of the clock output and the xLen the processor supports - */ -typedef unsigned long int size_t; -typedef unsigned long int clock_t; -typedef clock_t CORE_TICKS; -#endif - -/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION - Initialize these strings per platform -*/ -#ifndef COMPILER_VERSION - #ifdef __GNUC__ - #define COMPILER_VERSION "GCC"__VERSION__ - #else - #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" - #endif -#endif -#ifndef COMPILER_FLAGS - #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ -#endif -#ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" - #define MEM_LOCATION_UNSPEC 1 -#endif - -/* Data Types: - To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . - - *Imprtant*: - ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! -*/ -typedef signed short ee_s16; -typedef unsigned short ee_u16; -typedef signed int ee_s32; -typedef double ee_f32; -typedef unsigned char ee_u8; -typedef unsigned int ee_u32; -typedef unsigned long long ee_ptr_int; -typedef size_t ee_size_t; -/* align an offset to point to a 32b value */ -#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) - -/* Configuration: SEED_METHOD - Defines method to get seed values that cannot be computed at compile time. - - Valid values: - SEED_ARG - from command line. - SEED_FUNC - from a system function. - SEED_VOLATILE - from volatile variables. -*/ -#ifndef SEED_METHOD -#define SEED_METHOD SEED_VOLATILE -#endif - -/* Configuration: MEM_METHOD - Defines method to get a block of memry. - - Valid values: - MEM_MALLOC - for platforms that implement malloc and have malloc.h. - MEM_STATIC - to use a static memory array. - MEM_STACK - to allocate the data block on the stack (NYI). -*/ -#ifndef MEM_METHOD -#define MEM_METHOD MEM_STATIC -#endif - -/* Configuration: MULTITHREAD - Define for parallel execution - - Valid values: - 1 - only one context (default). - N>1 - will execute N copies in parallel. - - Note: - If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. - - Two sample implementations are provided. Use or to enable them. - - It is valid to have a different implementation of and in , - to fit a particular architecture. -*/ -#ifndef MULTITHREAD -#define MULTITHREAD 1 -#endif - -/* Configuration: USE_PTHREAD - Sample implementation for launching parallel contexts - This implementation uses pthread_thread_create and pthread_join. - - Valid values: - 0 - Do not use pthreads API. - 1 - Use pthreads API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_PTHREAD -#define USE_PTHREAD 0 -#endif - -/* Configuration: USE_FORK - Sample implementation for launching parallel contexts - This implementation uses fork, waitpid, shmget,shmat and shmdt. - - Valid values: - 0 - Do not use fork API. - 1 - Use fork API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_FORK -#define USE_FORK 0 -#endif - -/* Configuration: USE_SOCKET - Sample implementation for launching parallel contexts - This implementation uses fork, socket, sendto and recvfrom - - Valid values: - 0 - Do not use fork and sockets API. - 1 - Use fork and sockets API - - Note: - This flag only matters if MULTITHREAD has been defined to a value greater then 1. -*/ -#ifndef USE_SOCKET -#define USE_SOCKET 0 -#endif - -/* Configuration: MAIN_HAS_NOARGC - Needed if platform does not support getting arguments to main. - - Valid values: - 0 - argc/argv to main is supported - 1 - argc/argv to main is not supported -*/ -#ifndef MAIN_HAS_NOARGC -#define MAIN_HAS_NOARGC 1 -#endif - -/* Configuration: MAIN_HAS_NORETURN - Needed if platform does not support returning a value from main. - - Valid values: - 0 - main returns an int, and return value will be 0. - 1 - platform does not support returning a value from main -*/ -#ifndef MAIN_HAS_NORETURN -#define MAIN_HAS_NORETURN 0 -#endif - -/* Variable: default_num_contexts - Number of contexts to spawn in multicore context. - Override this global value to change number of contexts used. - - Note: - This value may not be set higher then the define. - - To experiment, you can set the define to the highest value expected, and use argc/argv in the to set this value from the command line. -*/ -extern ee_u32 default_num_contexts; - -#if (MULTITHREAD>1) -#if USE_PTHREAD - #include - #define PARALLEL_METHOD "PThreads" -#elif USE_FORK - #include - #include - #include - #include - #include /* for memcpy */ - #define PARALLEL_METHOD "Fork" -#elif USE_SOCKET - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #define PARALLEL_METHOD "Sockets" -#else - #define PARALLEL_METHOD "Proprietary" - #error "Please implement multicore functionality in core_portme.c to use multiple contexts." -#endif /* Method for multithreading */ -#endif /* MULTITHREAD > 1 */ - -typedef struct CORE_PORTABLE_S { -#if (MULTITHREAD>1) - #if USE_PTHREAD - pthread_t thread; - #elif USE_FORK - pid_t pid; - int shmid; - void *shm; - #elif USE_SOCKET - pid_t pid; - int sock; - struct sockaddr_in sa; - #endif /* Method for multithreading */ -#endif /* MULTITHREAD>1 */ - ee_u8 portable_id; -} core_portable; - -/* target specific init/fini */ -void portable_init(core_portable *p, int *argc, char *argv[]); -void portable_fini(core_portable *p); - -#if (SEED_METHOD==SEED_VOLATILE) - #if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN) - #define RUN_TYPE_FLAG 1 - #else - #if (TOTAL_DATA_SIZE==1200) - #define PROFILE_RUN 1 - #else - #define PERFORMANCE_RUN 1 - #endif - #endif -#endif /* SEED_METHOD==SEED_VOLATILE */ - -#endif /* CORE_PORTME_H */ diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.mak b/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.mak deleted file mode 100755 index 4bae943dc..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/core_portme.mak +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Original Author: Shay Gal-on - -#File: core_portme.mak - -# Flag: RISCVTOOLS -# Use this flag to point to your RISCV tools -RISCVTOOLS=$(RISCV) -# Flag: RISCVTYPE -# Type of toolchain to use -RISCVTYPE=riscv64-unknown-elf -# Flag: OUTFLAG -# Use this flag to define how to to get an executable (e.g -o) -OUTFLAG= -o -# Flag: CC -# Use this flag to define compiler to use -# david_harris@hmc.edu 20 Nov 2021 removed full path; require -CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc -#CC = $(RISCVTYPE)-gcc -# Flag: CFLAGS -# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags" -#PORT_CFLAGS = -O2 -static -std=gnu99 -PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld -FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)" -CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\" -#Flag: LFLAGS_END -# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts). -# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt. -LFLAGS_END += -# Flag: PORT_SRCS -# Port specific source files can be added here -PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S -# Flag: LOAD -# Define this flag if you need to load to a target, as in a cross compile environment. - -# Flag: RUN -# Define this flag if running does not consist of simple invocation of the binary. -# In a cross compile environment, you need to define this. - -#For flashing and using a tera term macro, you could use -#LOAD = flash ADDR -#RUN = ttpmacro coremark.ttl - -#For copying to target and executing via SSH connection, you could use -#LOAD = scp $(OUTFILE) user@target:~ -#RUN = ssh user@target -c - -#For native compilation and execution -LOAD = echo Loading done -RUN = spike pk - -OEXT = .o -EXE = .bare.riscv - -# Flag: SEPARATE_COMPILE -# Define if you need to separate compilation from link stage. -# In this case, you also need to define below how to create an object file, and how to link. -ifdef SEPARATE_COMPILE - -LD = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc -OBJOUT = -o -LFLAGS = -OFLAG = -o -COUT = -c -# Flag: PORT_OBJS -# Port specific object files can be added here -PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT) -PORT_CLEAN = *$(OEXT) - -$(OPATH)%$(OEXT) : %.c - $(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@ - -endif - -# Target: port_prebuild -# Generate any files that are needed before actual build starts. -# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1 -# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line. -# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it. -# Note - Using REBUILD=1 -# -# Use make PGO=1 to invoke this sample processing. - -ifdef PGO - ifeq (,$(findstring $(PGO),gen)) - PGO_STAGE=build_pgo_gcc - CFLAGS+=-fprofile-use - endif - PORT_CLEAN+=*.gcda *.gcno gmon.out -endif - -.PHONY: port_prebuild -port_prebuild: $(PGO_STAGE) - -.PHONY: build_pgo_gcc -build_pgo_gcc: - $(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1 - -# Target: port_postbuild -# Generate any files that are needed after actual build end. -# E.g. change format to srec, bin, zip in order to be able to load into flash -.PHONY: port_postbuild -port_postbuild: - -# Target: port_postrun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_postrun -port_postrun: - -# Target: port_prerun -# Do platform specific after run stuff. -# E.g. reset the board, backup the logfiles etc. -.PHONY: port_prerun -port_prerun: - -# Target: port_postload -# Do platform specific after load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_postload -port_postload: - -# Target: port_preload -# Do platform specific before load stuff. -# E.g. reset the reset power to the flash eraser -.PHONY: port_preload -port_preload: - -# FLAG: OPATH -# Path to the output folder. Default - current folder. -OPATH = ./ -MKDIR = mkdir -p - -# FLAG: PERL -# Define perl executable to calculate the geomean if running separate. -PERL=/usr/bin/perl diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/crt.S b/benchmarks/riscv-coremark/riscv64-baremetal/crt.S deleted file mode 100644 index d75e81e06..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/crt.S +++ /dev/null @@ -1,237 +0,0 @@ -# See LICENSE for license details. - -#include "encoding.h" - -#if __riscv_xlen == 64 -# define LREG ld -# define SREG sd -# define REGBYTES 8 -#else -# define LREG lw -# define SREG sw -# define REGBYTES 4 -#endif - - .section ".text.init" - .globl _start -_start: - li x1, 0 - li x2, 0 - li x3, 0 - li x4, 0 - li x5, 0 - li x6, 0 - li x7, 0 - li x8, 0 - li x9, 0 - li x10,0 - li x11,0 - li x12,0 - li x13,0 - li x14,0 - li x15,0 - li x16,0 - li x17,0 - li x18,0 - li x19,0 - li x20,0 - li x21,0 - li x22,0 - li x23,0 - li x24,0 - li x25,0 - li x26,0 - li x27,0 - li x28,0 - li x29,0 - li x30,0 - li x31,0 - - # enable FPU and accelerator if present - li t0, MSTATUS_FS | MSTATUS_XS - csrs mstatus, t0 - - # make sure XLEN agrees with compilation choice - li t0, 1 - slli t0, t0, 31 -#if __riscv_xlen == 64 - bgez t0, 1f -#else - bltz t0, 1f -#endif -2: - li a0, 1 - sw a0, tohost, t0 - j 2b -1: - -#ifdef __riscv_flen - # initialize FPU if we have one - la t0, 1f - csrw mtvec, t0 - - fssr x0 - fmv.s.x f0, x0 - fmv.s.x f1, x0 - fmv.s.x f2, x0 - fmv.s.x f3, x0 - fmv.s.x f4, x0 - fmv.s.x f5, x0 - fmv.s.x f6, x0 - fmv.s.x f7, x0 - fmv.s.x f8, x0 - fmv.s.x f9, x0 - fmv.s.x f10,x0 - fmv.s.x f11,x0 - fmv.s.x f12,x0 - fmv.s.x f13,x0 - fmv.s.x f14,x0 - fmv.s.x f15,x0 - fmv.s.x f16,x0 - fmv.s.x f17,x0 - fmv.s.x f18,x0 - fmv.s.x f19,x0 - fmv.s.x f20,x0 - fmv.s.x f21,x0 - fmv.s.x f22,x0 - fmv.s.x f23,x0 - fmv.s.x f24,x0 - fmv.s.x f25,x0 - fmv.s.x f26,x0 - fmv.s.x f27,x0 - fmv.s.x f28,x0 - fmv.s.x f29,x0 - fmv.s.x f30,x0 - fmv.s.x f31,x0 -1: -#endif - - # initialize trap vector - la t0, trap_entry - csrw mtvec, t0 - - # initialize global pointer -.option push -.option norelax - la gp, __global_pointer$ -.option pop - - la tp, _end + 63 - and tp, tp, -64 - - # get core id - csrr a0, mhartid - # for now, assume only 1 core - li a1, 1 -1:bgeu a0, a1, 1b - - # give each core 128KB of stack + TLS -#define STKSHIFT 17 - sll a2, a0, STKSHIFT - add tp, tp, a2 - add sp, a0, 1 - sll sp, sp, STKSHIFT - add sp, sp, tp - - j _init - - .align 2 -trap_entry: - addi sp, sp, -272 - - SREG x1, 1*REGBYTES(sp) - SREG x2, 2*REGBYTES(sp) - SREG x3, 3*REGBYTES(sp) - SREG x4, 4*REGBYTES(sp) - SREG x5, 5*REGBYTES(sp) - SREG x6, 6*REGBYTES(sp) - SREG x7, 7*REGBYTES(sp) - SREG x8, 8*REGBYTES(sp) - SREG x9, 9*REGBYTES(sp) - SREG x10, 10*REGBYTES(sp) - SREG x11, 11*REGBYTES(sp) - SREG x12, 12*REGBYTES(sp) - SREG x13, 13*REGBYTES(sp) - SREG x14, 14*REGBYTES(sp) - SREG x15, 15*REGBYTES(sp) - SREG x16, 16*REGBYTES(sp) - SREG x17, 17*REGBYTES(sp) - SREG x18, 18*REGBYTES(sp) - SREG x19, 19*REGBYTES(sp) - SREG x20, 20*REGBYTES(sp) - SREG x21, 21*REGBYTES(sp) - SREG x22, 22*REGBYTES(sp) - SREG x23, 23*REGBYTES(sp) - SREG x24, 24*REGBYTES(sp) - SREG x25, 25*REGBYTES(sp) - SREG x26, 26*REGBYTES(sp) - SREG x27, 27*REGBYTES(sp) - SREG x28, 28*REGBYTES(sp) - SREG x29, 29*REGBYTES(sp) - SREG x30, 30*REGBYTES(sp) - SREG x31, 31*REGBYTES(sp) - - csrr a0, mcause - csrr a1, mepc - mv a2, sp - jal handle_trap - csrw mepc, a0 - - # Remain in M-mode after eret - li t0, MSTATUS_MPP - csrs mstatus, t0 - - LREG x1, 1*REGBYTES(sp) - LREG x2, 2*REGBYTES(sp) - LREG x3, 3*REGBYTES(sp) - LREG x4, 4*REGBYTES(sp) - LREG x5, 5*REGBYTES(sp) - LREG x6, 6*REGBYTES(sp) - LREG x7, 7*REGBYTES(sp) - LREG x8, 8*REGBYTES(sp) - LREG x9, 9*REGBYTES(sp) - LREG x10, 10*REGBYTES(sp) - LREG x11, 11*REGBYTES(sp) - LREG x12, 12*REGBYTES(sp) - LREG x13, 13*REGBYTES(sp) - LREG x14, 14*REGBYTES(sp) - LREG x15, 15*REGBYTES(sp) - LREG x16, 16*REGBYTES(sp) - LREG x17, 17*REGBYTES(sp) - LREG x18, 18*REGBYTES(sp) - LREG x19, 19*REGBYTES(sp) - LREG x20, 20*REGBYTES(sp) - LREG x21, 21*REGBYTES(sp) - LREG x22, 22*REGBYTES(sp) - LREG x23, 23*REGBYTES(sp) - LREG x24, 24*REGBYTES(sp) - LREG x25, 25*REGBYTES(sp) - LREG x26, 26*REGBYTES(sp) - LREG x27, 27*REGBYTES(sp) - LREG x28, 28*REGBYTES(sp) - LREG x29, 29*REGBYTES(sp) - LREG x30, 30*REGBYTES(sp) - LREG x31, 31*REGBYTES(sp) - - addi sp, sp, 272 - mret - -.section ".tdata.begin" -.globl _tdata_begin -_tdata_begin: - -.section ".tdata.end" -.globl _tdata_end -_tdata_end: - -.section ".tbss.end" -.globl _tbss_end -_tbss_end: - -.section ".tohost","aw",@progbits -.align 6 -.globl tohost -tohost: .dword 0 -.align 6 -.globl fromhost -fromhost: .dword 0 diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/encoding.h b/benchmarks/riscv-coremark/riscv64-baremetal/encoding.h deleted file mode 100644 index c109ce189..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/encoding.h +++ /dev/null @@ -1,1471 +0,0 @@ -// See LICENSE for license details. - -#ifndef RISCV_CSR_ENCODING_H -#define RISCV_CSR_ENCODING_H - -#define MSTATUS_UIE 0x00000001 -#define MSTATUS_SIE 0x00000002 -#define MSTATUS_HIE 0x00000004 -#define MSTATUS_MIE 0x00000008 -#define MSTATUS_UPIE 0x00000010 -#define MSTATUS_SPIE 0x00000020 -#define MSTATUS_HPIE 0x00000040 -#define MSTATUS_MPIE 0x00000080 -#define MSTATUS_SPP 0x00000100 -#define MSTATUS_HPP 0x00000600 -#define MSTATUS_MPP 0x00001800 -#define MSTATUS_FS 0x00006000 -#define MSTATUS_XS 0x00018000 -#define MSTATUS_MPRV 0x00020000 -#define MSTATUS_SUM 0x00040000 -#define MSTATUS_MXR 0x00080000 -#define MSTATUS_TVM 0x00100000 -#define MSTATUS_TW 0x00200000 -#define MSTATUS_TSR 0x00400000 -#define MSTATUS32_SD 0x80000000 -#define MSTATUS_UXL 0x0000000300000000 -#define MSTATUS_SXL 0x0000000C00000000 -#define MSTATUS64_SD 0x8000000000000000 - -#define SSTATUS_UIE 0x00000001 -#define SSTATUS_SIE 0x00000002 -#define SSTATUS_UPIE 0x00000010 -#define SSTATUS_SPIE 0x00000020 -#define SSTATUS_SPP 0x00000100 -#define SSTATUS_FS 0x00006000 -#define SSTATUS_XS 0x00018000 -#define SSTATUS_SUM 0x00040000 -#define SSTATUS_MXR 0x00080000 -#define SSTATUS32_SD 0x80000000 -#define SSTATUS_UXL 0x0000000300000000 -#define SSTATUS64_SD 0x8000000000000000 - -#define DCSR_XDEBUGVER (3U<<30) -#define DCSR_NDRESET (1<<29) -#define DCSR_FULLRESET (1<<28) -#define DCSR_EBREAKM (1<<15) -#define DCSR_EBREAKH (1<<14) -#define DCSR_EBREAKS (1<<13) -#define DCSR_EBREAKU (1<<12) -#define DCSR_STOPCYCLE (1<<10) -#define DCSR_STOPTIME (1<<9) -#define DCSR_CAUSE (7<<6) -#define DCSR_DEBUGINT (1<<5) -#define DCSR_HALT (1<<3) -#define DCSR_STEP (1<<2) -#define DCSR_PRV (3<<0) - -#define DCSR_CAUSE_NONE 0 -#define DCSR_CAUSE_SWBP 1 -#define DCSR_CAUSE_HWBP 2 -#define DCSR_CAUSE_DEBUGINT 3 -#define DCSR_CAUSE_STEP 4 -#define DCSR_CAUSE_HALT 5 - -#define MCONTROL_TYPE(xlen) (0xfULL<<((xlen)-4)) -#define MCONTROL_DMODE(xlen) (1ULL<<((xlen)-5)) -#define MCONTROL_MASKMAX(xlen) (0x3fULL<<((xlen)-11)) - -#define MCONTROL_SELECT (1<<19) -#define MCONTROL_TIMING (1<<18) -#define MCONTROL_ACTION (0x3f<<12) -#define MCONTROL_CHAIN (1<<11) -#define MCONTROL_MATCH (0xf<<7) -#define MCONTROL_M (1<<6) -#define MCONTROL_H (1<<5) -#define MCONTROL_S (1<<4) -#define MCONTROL_U (1<<3) -#define MCONTROL_EXECUTE (1<<2) -#define MCONTROL_STORE (1<<1) -#define MCONTROL_LOAD (1<<0) - -#define MCONTROL_TYPE_NONE 0 -#define MCONTROL_TYPE_MATCH 2 - -#define MCONTROL_ACTION_DEBUG_EXCEPTION 0 -#define MCONTROL_ACTION_DEBUG_MODE 1 -#define MCONTROL_ACTION_TRACE_START 2 -#define MCONTROL_ACTION_TRACE_STOP 3 -#define MCONTROL_ACTION_TRACE_EMIT 4 - -#define MCONTROL_MATCH_EQUAL 0 -#define MCONTROL_MATCH_NAPOT 1 -#define MCONTROL_MATCH_GE 2 -#define MCONTROL_MATCH_LT 3 -#define MCONTROL_MATCH_MASK_LOW 4 -#define MCONTROL_MATCH_MASK_HIGH 5 - -#define MIP_SSIP (1 << IRQ_S_SOFT) -#define MIP_HSIP (1 << IRQ_H_SOFT) -#define MIP_MSIP (1 << IRQ_M_SOFT) -#define MIP_STIP (1 << IRQ_S_TIMER) -#define MIP_HTIP (1 << IRQ_H_TIMER) -#define MIP_MTIP (1 << IRQ_M_TIMER) -#define MIP_SEIP (1 << IRQ_S_EXT) -#define MIP_HEIP (1 << IRQ_H_EXT) -#define MIP_MEIP (1 << IRQ_M_EXT) - -#define SIP_SSIP MIP_SSIP -#define SIP_STIP MIP_STIP - -#define PRV_U 0 -#define PRV_S 1 -#define PRV_H 2 -#define PRV_M 3 - -#define SATP32_MODE 0x80000000 -#define SATP32_ASID 0x7FC00000 -#define SATP32_PPN 0x003FFFFF -#define SATP64_MODE 0xF000000000000000 -#define SATP64_ASID 0x0FFFF00000000000 -#define SATP64_PPN 0x00000FFFFFFFFFFF - -#define SATP_MODE_OFF 0 -#define SATP_MODE_SV32 1 -#define SATP_MODE_SV39 8 -#define SATP_MODE_SV48 9 -#define SATP_MODE_SV57 10 -#define SATP_MODE_SV64 11 - -#define PMP_R 0x01 -#define PMP_W 0x02 -#define PMP_X 0x04 -#define PMP_A 0x18 -#define PMP_L 0x80 -#define PMP_SHIFT 2 - -#define PMP_TOR 0x08 -#define PMP_NA4 0x10 -#define PMP_NAPOT 0x18 - -#define IRQ_S_SOFT 1 -#define IRQ_H_SOFT 2 -#define IRQ_M_SOFT 3 -#define IRQ_S_TIMER 5 -#define IRQ_H_TIMER 6 -#define IRQ_M_TIMER 7 -#define IRQ_S_EXT 9 -#define IRQ_H_EXT 10 -#define IRQ_M_EXT 11 -#define IRQ_COP 12 -#define IRQ_HOST 13 - -#define DEFAULT_RSTVEC 0x00001000 -#define CLINT_BASE 0x02000000 -#define CLINT_SIZE 0x000c0000 -#define EXT_IO_BASE 0x40000000 -#define DRAM_BASE 0x80000000 - -// page table entry (PTE) fields -#define PTE_V 0x001 // Valid -#define PTE_R 0x002 // Read -#define PTE_W 0x004 // Write -#define PTE_X 0x008 // Execute -#define PTE_U 0x010 // User -#define PTE_G 0x020 // Global -#define PTE_A 0x040 // Accessed -#define PTE_D 0x080 // Dirty -#define PTE_SOFT 0x300 // Reserved for Software - -#define PTE_PPN_SHIFT 10 - -#define PTE_TABLE(PTE) (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V) - -#ifdef __riscv - -#if __riscv_xlen == 64 -# define MSTATUS_SD MSTATUS64_SD -# define SSTATUS_SD SSTATUS64_SD -# define RISCV_PGLEVEL_BITS 9 -# define SATP_MODE SATP64_MODE -#else -# define MSTATUS_SD MSTATUS32_SD -# define SSTATUS_SD SSTATUS32_SD -# define RISCV_PGLEVEL_BITS 10 -# define SATP_MODE SATP32_MODE -#endif -#define RISCV_PGSHIFT 12 -#define RISCV_PGSIZE (1 << RISCV_PGSHIFT) - -#ifndef __ASSEMBLER__ - -#ifdef __GNUC__ - -#define read_csr(reg) ({ unsigned long __tmp; \ - asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ - __tmp; }) - -#define write_csr(reg, val) ({ \ - asm volatile ("csrw " #reg ", %0" :: "rK"(val)); }) - -#define swap_csr(reg, val) ({ unsigned long __tmp; \ - asm volatile ("csrrw %0, " #reg ", %1" : "=r"(__tmp) : "rK"(val)); \ - __tmp; }) - -#define set_csr(reg, bit) ({ unsigned long __tmp; \ - asm volatile ("csrrs %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ - __tmp; }) - -#define clear_csr(reg, bit) ({ unsigned long __tmp; \ - asm volatile ("csrrc %0, " #reg ", %1" : "=r"(__tmp) : "rK"(bit)); \ - __tmp; }) - -#define rdtime() read_csr(time) -#define rdcycle() read_csr(cycle) -#define rdinstret() read_csr(instret) - -#endif - -#endif - -#endif - -#endif -/* Automatically generated by parse-opcodes. */ -#ifndef RISCV_ENCODING_H -#define RISCV_ENCODING_H -#define MATCH_BEQ 0x63 -#define MASK_BEQ 0x707f -#define MATCH_BNE 0x1063 -#define MASK_BNE 0x707f -#define MATCH_BLT 0x4063 -#define MASK_BLT 0x707f -#define MATCH_BGE 0x5063 -#define MASK_BGE 0x707f -#define MATCH_BLTU 0x6063 -#define MASK_BLTU 0x707f -#define MATCH_BGEU 0x7063 -#define MASK_BGEU 0x707f -#define MATCH_JALR 0x67 -#define MASK_JALR 0x707f -#define MATCH_JAL 0x6f -#define MASK_JAL 0x7f -#define MATCH_LUI 0x37 -#define MASK_LUI 0x7f -#define MATCH_AUIPC 0x17 -#define MASK_AUIPC 0x7f -#define MATCH_ADDI 0x13 -#define MASK_ADDI 0x707f -#define MATCH_SLLI 0x1013 -#define MASK_SLLI 0xfc00707f -#define MATCH_SLTI 0x2013 -#define MASK_SLTI 0x707f -#define MATCH_SLTIU 0x3013 -#define MASK_SLTIU 0x707f -#define MATCH_XORI 0x4013 -#define MASK_XORI 0x707f -#define MATCH_SRLI 0x5013 -#define MASK_SRLI 0xfc00707f -#define MATCH_SRAI 0x40005013 -#define MASK_SRAI 0xfc00707f -#define MATCH_ORI 0x6013 -#define MASK_ORI 0x707f -#define MATCH_ANDI 0x7013 -#define MASK_ANDI 0x707f -#define MATCH_ADD 0x33 -#define MASK_ADD 0xfe00707f -#define MATCH_SUB 0x40000033 -#define MASK_SUB 0xfe00707f -#define MATCH_SLL 0x1033 -#define MASK_SLL 0xfe00707f -#define MATCH_SLT 0x2033 -#define MASK_SLT 0xfe00707f -#define MATCH_SLTU 0x3033 -#define MASK_SLTU 0xfe00707f -#define MATCH_XOR 0x4033 -#define MASK_XOR 0xfe00707f -#define MATCH_SRL 0x5033 -#define MASK_SRL 0xfe00707f -#define MATCH_SRA 0x40005033 -#define MASK_SRA 0xfe00707f -#define MATCH_OR 0x6033 -#define MASK_OR 0xfe00707f -#define MATCH_AND 0x7033 -#define MASK_AND 0xfe00707f -#define MATCH_ADDIW 0x1b -#define MASK_ADDIW 0x707f -#define MATCH_SLLIW 0x101b -#define MASK_SLLIW 0xfe00707f -#define MATCH_SRLIW 0x501b -#define MASK_SRLIW 0xfe00707f -#define MATCH_SRAIW 0x4000501b -#define MASK_SRAIW 0xfe00707f -#define MATCH_ADDW 0x3b -#define MASK_ADDW 0xfe00707f -#define MATCH_SUBW 0x4000003b -#define MASK_SUBW 0xfe00707f -#define MATCH_SLLW 0x103b -#define MASK_SLLW 0xfe00707f -#define MATCH_SRLW 0x503b -#define MASK_SRLW 0xfe00707f -#define MATCH_SRAW 0x4000503b -#define MASK_SRAW 0xfe00707f -#define MATCH_LB 0x3 -#define MASK_LB 0x707f -#define MATCH_LH 0x1003 -#define MASK_LH 0x707f -#define MATCH_LW 0x2003 -#define MASK_LW 0x707f -#define MATCH_LD 0x3003 -#define MASK_LD 0x707f -#define MATCH_LBU 0x4003 -#define MASK_LBU 0x707f -#define MATCH_LHU 0x5003 -#define MASK_LHU 0x707f -#define MATCH_LWU 0x6003 -#define MASK_LWU 0x707f -#define MATCH_SB 0x23 -#define MASK_SB 0x707f -#define MATCH_SH 0x1023 -#define MASK_SH 0x707f -#define MATCH_SW 0x2023 -#define MASK_SW 0x707f -#define MATCH_SD 0x3023 -#define MASK_SD 0x707f -#define MATCH_FENCE 0xf -#define MASK_FENCE 0x707f -#define MATCH_FENCE_I 0x100f -#define MASK_FENCE_I 0x707f -#define MATCH_MUL 0x2000033 -#define MASK_MUL 0xfe00707f -#define MATCH_MULH 0x2001033 -#define MASK_MULH 0xfe00707f -#define MATCH_MULHSU 0x2002033 -#define MASK_MULHSU 0xfe00707f -#define MATCH_MULHU 0x2003033 -#define MASK_MULHU 0xfe00707f -#define MATCH_DIV 0x2004033 -#define MASK_DIV 0xfe00707f -#define MATCH_DIVU 0x2005033 -#define MASK_DIVU 0xfe00707f -#define MATCH_REM 0x2006033 -#define MASK_REM 0xfe00707f -#define MATCH_REMU 0x2007033 -#define MASK_REMU 0xfe00707f -#define MATCH_MULW 0x200003b -#define MASK_MULW 0xfe00707f -#define MATCH_DIVW 0x200403b -#define MASK_DIVW 0xfe00707f -#define MATCH_DIVUW 0x200503b -#define MASK_DIVUW 0xfe00707f -#define MATCH_REMW 0x200603b -#define MASK_REMW 0xfe00707f -#define MATCH_REMUW 0x200703b -#define MASK_REMUW 0xfe00707f -#define MATCH_AMOADD_W 0x202f -#define MASK_AMOADD_W 0xf800707f -#define MATCH_AMOXOR_W 0x2000202f -#define MASK_AMOXOR_W 0xf800707f -#define MATCH_AMOOR_W 0x4000202f -#define MASK_AMOOR_W 0xf800707f -#define MATCH_AMOAND_W 0x6000202f -#define MASK_AMOAND_W 0xf800707f -#define MATCH_AMOMIN_W 0x8000202f -#define MASK_AMOMIN_W 0xf800707f -#define MATCH_AMOMAX_W 0xa000202f -#define MASK_AMOMAX_W 0xf800707f -#define MATCH_AMOMINU_W 0xc000202f -#define MASK_AMOMINU_W 0xf800707f -#define MATCH_AMOMAXU_W 0xe000202f -#define MASK_AMOMAXU_W 0xf800707f -#define MATCH_AMOSWAP_W 0x800202f -#define MASK_AMOSWAP_W 0xf800707f -#define MATCH_LR_W 0x1000202f -#define MASK_LR_W 0xf9f0707f -#define MATCH_SC_W 0x1800202f -#define MASK_SC_W 0xf800707f -#define MATCH_AMOADD_D 0x302f -#define MASK_AMOADD_D 0xf800707f -#define MATCH_AMOXOR_D 0x2000302f -#define MASK_AMOXOR_D 0xf800707f -#define MATCH_AMOOR_D 0x4000302f -#define MASK_AMOOR_D 0xf800707f -#define MATCH_AMOAND_D 0x6000302f -#define MASK_AMOAND_D 0xf800707f -#define MATCH_AMOMIN_D 0x8000302f -#define MASK_AMOMIN_D 0xf800707f -#define MATCH_AMOMAX_D 0xa000302f -#define MASK_AMOMAX_D 0xf800707f -#define MATCH_AMOMINU_D 0xc000302f -#define MASK_AMOMINU_D 0xf800707f -#define MATCH_AMOMAXU_D 0xe000302f -#define MASK_AMOMAXU_D 0xf800707f -#define MATCH_AMOSWAP_D 0x800302f -#define MASK_AMOSWAP_D 0xf800707f -#define MATCH_LR_D 0x1000302f -#define MASK_LR_D 0xf9f0707f -#define MATCH_SC_D 0x1800302f -#define MASK_SC_D 0xf800707f -#define MATCH_ECALL 0x73 -#define MASK_ECALL 0xffffffff -#define MATCH_EBREAK 0x100073 -#define MASK_EBREAK 0xffffffff -#define MATCH_URET 0x200073 -#define MASK_URET 0xffffffff -#define MATCH_SRET 0x10200073 -#define MASK_SRET 0xffffffff -#define MATCH_MRET 0x30200073 -#define MASK_MRET 0xffffffff -#define MATCH_DRET 0x7b200073 -#define MASK_DRET 0xffffffff -#define MATCH_SFENCE_VMA 0x12000073 -#define MASK_SFENCE_VMA 0xfe007fff -#define MATCH_WFI 0x10500073 -#define MASK_WFI 0xffffffff -#define MATCH_CSRRW 0x1073 -#define MASK_CSRRW 0x707f -#define MATCH_CSRRS 0x2073 -#define MASK_CSRRS 0x707f -#define MATCH_CSRRC 0x3073 -#define MASK_CSRRC 0x707f -#define MATCH_CSRRWI 0x5073 -#define MASK_CSRRWI 0x707f -#define MATCH_CSRRSI 0x6073 -#define MASK_CSRRSI 0x707f -#define MATCH_CSRRCI 0x7073 -#define MASK_CSRRCI 0x707f -#define MATCH_FADD_S 0x53 -#define MASK_FADD_S 0xfe00007f -#define MATCH_FSUB_S 0x8000053 -#define MASK_FSUB_S 0xfe00007f -#define MATCH_FMUL_S 0x10000053 -#define MASK_FMUL_S 0xfe00007f -#define MATCH_FDIV_S 0x18000053 -#define MASK_FDIV_S 0xfe00007f -#define MATCH_FSGNJ_S 0x20000053 -#define MASK_FSGNJ_S 0xfe00707f -#define MATCH_FSGNJN_S 0x20001053 -#define MASK_FSGNJN_S 0xfe00707f -#define MATCH_FSGNJX_S 0x20002053 -#define MASK_FSGNJX_S 0xfe00707f -#define MATCH_FMIN_S 0x28000053 -#define MASK_FMIN_S 0xfe00707f -#define MATCH_FMAX_S 0x28001053 -#define MASK_FMAX_S 0xfe00707f -#define MATCH_FSQRT_S 0x58000053 -#define MASK_FSQRT_S 0xfff0007f -#define MATCH_FADD_D 0x2000053 -#define MASK_FADD_D 0xfe00007f -#define MATCH_FSUB_D 0xa000053 -#define MASK_FSUB_D 0xfe00007f -#define MATCH_FMUL_D 0x12000053 -#define MASK_FMUL_D 0xfe00007f -#define MATCH_FDIV_D 0x1a000053 -#define MASK_FDIV_D 0xfe00007f -#define MATCH_FSGNJ_D 0x22000053 -#define MASK_FSGNJ_D 0xfe00707f -#define MATCH_FSGNJN_D 0x22001053 -#define MASK_FSGNJN_D 0xfe00707f -#define MATCH_FSGNJX_D 0x22002053 -#define MASK_FSGNJX_D 0xfe00707f -#define MATCH_FMIN_D 0x2a000053 -#define MASK_FMIN_D 0xfe00707f -#define MATCH_FMAX_D 0x2a001053 -#define MASK_FMAX_D 0xfe00707f -#define MATCH_FCVT_S_D 0x40100053 -#define MASK_FCVT_S_D 0xfff0007f -#define MATCH_FCVT_D_S 0x42000053 -#define MASK_FCVT_D_S 0xfff0007f -#define MATCH_FSQRT_D 0x5a000053 -#define MASK_FSQRT_D 0xfff0007f -#define MATCH_FADD_Q 0x6000053 -#define MASK_FADD_Q 0xfe00007f -#define MATCH_FSUB_Q 0xe000053 -#define MASK_FSUB_Q 0xfe00007f -#define MATCH_FMUL_Q 0x16000053 -#define MASK_FMUL_Q 0xfe00007f -#define MATCH_FDIV_Q 0x1e000053 -#define MASK_FDIV_Q 0xfe00007f -#define MATCH_FSGNJ_Q 0x26000053 -#define MASK_FSGNJ_Q 0xfe00707f -#define MATCH_FSGNJN_Q 0x26001053 -#define MASK_FSGNJN_Q 0xfe00707f -#define MATCH_FSGNJX_Q 0x26002053 -#define MASK_FSGNJX_Q 0xfe00707f -#define MATCH_FMIN_Q 0x2e000053 -#define MASK_FMIN_Q 0xfe00707f -#define MATCH_FMAX_Q 0x2e001053 -#define MASK_FMAX_Q 0xfe00707f -#define MATCH_FCVT_S_Q 0x40300053 -#define MASK_FCVT_S_Q 0xfff0007f -#define MATCH_FCVT_Q_S 0x46000053 -#define MASK_FCVT_Q_S 0xfff0007f -#define MATCH_FCVT_D_Q 0x42300053 -#define MASK_FCVT_D_Q 0xfff0007f -#define MATCH_FCVT_Q_D 0x46100053 -#define MASK_FCVT_Q_D 0xfff0007f -#define MATCH_FSQRT_Q 0x5e000053 -#define MASK_FSQRT_Q 0xfff0007f -#define MATCH_FLE_S 0xa0000053 -#define MASK_FLE_S 0xfe00707f -#define MATCH_FLT_S 0xa0001053 -#define MASK_FLT_S 0xfe00707f -#define MATCH_FEQ_S 0xa0002053 -#define MASK_FEQ_S 0xfe00707f -#define MATCH_FLE_D 0xa2000053 -#define MASK_FLE_D 0xfe00707f -#define MATCH_FLT_D 0xa2001053 -#define MASK_FLT_D 0xfe00707f -#define MATCH_FEQ_D 0xa2002053 -#define MASK_FEQ_D 0xfe00707f -#define MATCH_FLE_Q 0xa6000053 -#define MASK_FLE_Q 0xfe00707f -#define MATCH_FLT_Q 0xa6001053 -#define MASK_FLT_Q 0xfe00707f -#define MATCH_FEQ_Q 0xa6002053 -#define MASK_FEQ_Q 0xfe00707f -#define MATCH_FCVT_W_S 0xc0000053 -#define MASK_FCVT_W_S 0xfff0007f -#define MATCH_FCVT_WU_S 0xc0100053 -#define MASK_FCVT_WU_S 0xfff0007f -#define MATCH_FCVT_L_S 0xc0200053 -#define MASK_FCVT_L_S 0xfff0007f -#define MATCH_FCVT_LU_S 0xc0300053 -#define MASK_FCVT_LU_S 0xfff0007f -#define MATCH_FMV_X_W 0xe0000053 -#define MASK_FMV_X_W 0xfff0707f -#define MATCH_FCLASS_S 0xe0001053 -#define MASK_FCLASS_S 0xfff0707f -#define MATCH_FCVT_W_D 0xc2000053 -#define MASK_FCVT_W_D 0xfff0007f -#define MATCH_FCVT_WU_D 0xc2100053 -#define MASK_FCVT_WU_D 0xfff0007f -#define MATCH_FCVT_L_D 0xc2200053 -#define MASK_FCVT_L_D 0xfff0007f -#define MATCH_FCVT_LU_D 0xc2300053 -#define MASK_FCVT_LU_D 0xfff0007f -#define MATCH_FMV_X_D 0xe2000053 -#define MASK_FMV_X_D 0xfff0707f -#define MATCH_FCLASS_D 0xe2001053 -#define MASK_FCLASS_D 0xfff0707f -#define MATCH_FCVT_W_Q 0xc6000053 -#define MASK_FCVT_W_Q 0xfff0007f -#define MATCH_FCVT_WU_Q 0xc6100053 -#define MASK_FCVT_WU_Q 0xfff0007f -#define MATCH_FCVT_L_Q 0xc6200053 -#define MASK_FCVT_L_Q 0xfff0007f -#define MATCH_FCVT_LU_Q 0xc6300053 -#define MASK_FCVT_LU_Q 0xfff0007f -#define MATCH_FMV_X_Q 0xe6000053 -#define MASK_FMV_X_Q 0xfff0707f -#define MATCH_FCLASS_Q 0xe6001053 -#define MASK_FCLASS_Q 0xfff0707f -#define MATCH_FCVT_S_W 0xd0000053 -#define MASK_FCVT_S_W 0xfff0007f -#define MATCH_FCVT_S_WU 0xd0100053 -#define MASK_FCVT_S_WU 0xfff0007f -#define MATCH_FCVT_S_L 0xd0200053 -#define MASK_FCVT_S_L 0xfff0007f -#define MATCH_FCVT_S_LU 0xd0300053 -#define MASK_FCVT_S_LU 0xfff0007f -#define MATCH_FMV_W_X 0xf0000053 -#define MASK_FMV_W_X 0xfff0707f -#define MATCH_FCVT_D_W 0xd2000053 -#define MASK_FCVT_D_W 0xfff0007f -#define MATCH_FCVT_D_WU 0xd2100053 -#define MASK_FCVT_D_WU 0xfff0007f -#define MATCH_FCVT_D_L 0xd2200053 -#define MASK_FCVT_D_L 0xfff0007f -#define MATCH_FCVT_D_LU 0xd2300053 -#define MASK_FCVT_D_LU 0xfff0007f -#define MATCH_FMV_D_X 0xf2000053 -#define MASK_FMV_D_X 0xfff0707f -#define MATCH_FCVT_Q_W 0xd6000053 -#define MASK_FCVT_Q_W 0xfff0007f -#define MATCH_FCVT_Q_WU 0xd6100053 -#define MASK_FCVT_Q_WU 0xfff0007f -#define MATCH_FCVT_Q_L 0xd6200053 -#define MASK_FCVT_Q_L 0xfff0007f -#define MATCH_FCVT_Q_LU 0xd6300053 -#define MASK_FCVT_Q_LU 0xfff0007f -#define MATCH_FMV_Q_X 0xf6000053 -#define MASK_FMV_Q_X 0xfff0707f -#define MATCH_FLW 0x2007 -#define MASK_FLW 0x707f -#define MATCH_FLD 0x3007 -#define MASK_FLD 0x707f -#define MATCH_FLQ 0x4007 -#define MASK_FLQ 0x707f -#define MATCH_FSW 0x2027 -#define MASK_FSW 0x707f -#define MATCH_FSD 0x3027 -#define MASK_FSD 0x707f -#define MATCH_FSQ 0x4027 -#define MASK_FSQ 0x707f -#define MATCH_FMADD_S 0x43 -#define MASK_FMADD_S 0x600007f -#define MATCH_FMSUB_S 0x47 -#define MASK_FMSUB_S 0x600007f -#define MATCH_FNMSUB_S 0x4b -#define MASK_FNMSUB_S 0x600007f -#define MATCH_FNMADD_S 0x4f -#define MASK_FNMADD_S 0x600007f -#define MATCH_FMADD_D 0x2000043 -#define MASK_FMADD_D 0x600007f -#define MATCH_FMSUB_D 0x2000047 -#define MASK_FMSUB_D 0x600007f -#define MATCH_FNMSUB_D 0x200004b -#define MASK_FNMSUB_D 0x600007f -#define MATCH_FNMADD_D 0x200004f -#define MASK_FNMADD_D 0x600007f -#define MATCH_FMADD_Q 0x6000043 -#define MASK_FMADD_Q 0x600007f -#define MATCH_FMSUB_Q 0x6000047 -#define MASK_FMSUB_Q 0x600007f -#define MATCH_FNMSUB_Q 0x600004b -#define MASK_FNMSUB_Q 0x600007f -#define MATCH_FNMADD_Q 0x600004f -#define MASK_FNMADD_Q 0x600007f -#define MATCH_C_NOP 0x1 -#define MASK_C_NOP 0xffff -#define MATCH_C_ADDI16SP 0x6101 -#define MASK_C_ADDI16SP 0xef83 -#define MATCH_C_JR 0x8002 -#define MASK_C_JR 0xf07f -#define MATCH_C_JALR 0x9002 -#define MASK_C_JALR 0xf07f -#define MATCH_C_EBREAK 0x9002 -#define MASK_C_EBREAK 0xffff -#define MATCH_C_LD 0x6000 -#define MASK_C_LD 0xe003 -#define MATCH_C_SD 0xe000 -#define MASK_C_SD 0xe003 -#define MATCH_C_ADDIW 0x2001 -#define MASK_C_ADDIW 0xe003 -#define MATCH_C_LDSP 0x6002 -#define MASK_C_LDSP 0xe003 -#define MATCH_C_SDSP 0xe002 -#define MASK_C_SDSP 0xe003 -#define MATCH_C_ADDI4SPN 0x0 -#define MASK_C_ADDI4SPN 0xe003 -#define MATCH_C_FLD 0x2000 -#define MASK_C_FLD 0xe003 -#define MATCH_C_LW 0x4000 -#define MASK_C_LW 0xe003 -#define MATCH_C_FLW 0x6000 -#define MASK_C_FLW 0xe003 -#define MATCH_C_FSD 0xa000 -#define MASK_C_FSD 0xe003 -#define MATCH_C_SW 0xc000 -#define MASK_C_SW 0xe003 -#define MATCH_C_FSW 0xe000 -#define MASK_C_FSW 0xe003 -#define MATCH_C_ADDI 0x1 -#define MASK_C_ADDI 0xe003 -#define MATCH_C_JAL 0x2001 -#define MASK_C_JAL 0xe003 -#define MATCH_C_LI 0x4001 -#define MASK_C_LI 0xe003 -#define MATCH_C_LUI 0x6001 -#define MASK_C_LUI 0xe003 -#define MATCH_C_SRLI 0x8001 -#define MASK_C_SRLI 0xec03 -#define MATCH_C_SRAI 0x8401 -#define MASK_C_SRAI 0xec03 -#define MATCH_C_ANDI 0x8801 -#define MASK_C_ANDI 0xec03 -#define MATCH_C_SUB 0x8c01 -#define MASK_C_SUB 0xfc63 -#define MATCH_C_XOR 0x8c21 -#define MASK_C_XOR 0xfc63 -#define MATCH_C_OR 0x8c41 -#define MASK_C_OR 0xfc63 -#define MATCH_C_AND 0x8c61 -#define MASK_C_AND 0xfc63 -#define MATCH_C_SUBW 0x9c01 -#define MASK_C_SUBW 0xfc63 -#define MATCH_C_ADDW 0x9c21 -#define MASK_C_ADDW 0xfc63 -#define MATCH_C_J 0xa001 -#define MASK_C_J 0xe003 -#define MATCH_C_BEQZ 0xc001 -#define MASK_C_BEQZ 0xe003 -#define MATCH_C_BNEZ 0xe001 -#define MASK_C_BNEZ 0xe003 -#define MATCH_C_SLLI 0x2 -#define MASK_C_SLLI 0xe003 -#define MATCH_C_FLDSP 0x2002 -#define MASK_C_FLDSP 0xe003 -#define MATCH_C_LWSP 0x4002 -#define MASK_C_LWSP 0xe003 -#define MATCH_C_FLWSP 0x6002 -#define MASK_C_FLWSP 0xe003 -#define MATCH_C_MV 0x8002 -#define MASK_C_MV 0xf003 -#define MATCH_C_ADD 0x9002 -#define MASK_C_ADD 0xf003 -#define MATCH_C_FSDSP 0xa002 -#define MASK_C_FSDSP 0xe003 -#define MATCH_C_SWSP 0xc002 -#define MASK_C_SWSP 0xe003 -#define MATCH_C_FSWSP 0xe002 -#define MASK_C_FSWSP 0xe003 -#define MATCH_CUSTOM0 0xb -#define MASK_CUSTOM0 0x707f -#define MATCH_CUSTOM0_RS1 0x200b -#define MASK_CUSTOM0_RS1 0x707f -#define MATCH_CUSTOM0_RS1_RS2 0x300b -#define MASK_CUSTOM0_RS1_RS2 0x707f -#define MATCH_CUSTOM0_RD 0x400b -#define MASK_CUSTOM0_RD 0x707f -#define MATCH_CUSTOM0_RD_RS1 0x600b -#define MASK_CUSTOM0_RD_RS1 0x707f -#define MATCH_CUSTOM0_RD_RS1_RS2 0x700b -#define MASK_CUSTOM0_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM1 0x2b -#define MASK_CUSTOM1 0x707f -#define MATCH_CUSTOM1_RS1 0x202b -#define MASK_CUSTOM1_RS1 0x707f -#define MATCH_CUSTOM1_RS1_RS2 0x302b -#define MASK_CUSTOM1_RS1_RS2 0x707f -#define MATCH_CUSTOM1_RD 0x402b -#define MASK_CUSTOM1_RD 0x707f -#define MATCH_CUSTOM1_RD_RS1 0x602b -#define MASK_CUSTOM1_RD_RS1 0x707f -#define MATCH_CUSTOM1_RD_RS1_RS2 0x702b -#define MASK_CUSTOM1_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM2 0x5b -#define MASK_CUSTOM2 0x707f -#define MATCH_CUSTOM2_RS1 0x205b -#define MASK_CUSTOM2_RS1 0x707f -#define MATCH_CUSTOM2_RS1_RS2 0x305b -#define MASK_CUSTOM2_RS1_RS2 0x707f -#define MATCH_CUSTOM2_RD 0x405b -#define MASK_CUSTOM2_RD 0x707f -#define MATCH_CUSTOM2_RD_RS1 0x605b -#define MASK_CUSTOM2_RD_RS1 0x707f -#define MATCH_CUSTOM2_RD_RS1_RS2 0x705b -#define MASK_CUSTOM2_RD_RS1_RS2 0x707f -#define MATCH_CUSTOM3 0x7b -#define MASK_CUSTOM3 0x707f -#define MATCH_CUSTOM3_RS1 0x207b -#define MASK_CUSTOM3_RS1 0x707f -#define MATCH_CUSTOM3_RS1_RS2 0x307b -#define MASK_CUSTOM3_RS1_RS2 0x707f -#define MATCH_CUSTOM3_RD 0x407b -#define MASK_CUSTOM3_RD 0x707f -#define MATCH_CUSTOM3_RD_RS1 0x607b -#define MASK_CUSTOM3_RD_RS1 0x707f -#define MATCH_CUSTOM3_RD_RS1_RS2 0x707b -#define MASK_CUSTOM3_RD_RS1_RS2 0x707f -#define CSR_FFLAGS 0x1 -#define CSR_FRM 0x2 -#define CSR_FCSR 0x3 -#define CSR_CYCLE 0xc00 -#define CSR_TIME 0xc01 -#define CSR_INSTRET 0xc02 -#define CSR_HPMCOUNTER3 0xc03 -#define CSR_HPMCOUNTER4 0xc04 -#define CSR_HPMCOUNTER5 0xc05 -#define CSR_HPMCOUNTER6 0xc06 -#define CSR_HPMCOUNTER7 0xc07 -#define CSR_HPMCOUNTER8 0xc08 -#define CSR_HPMCOUNTER9 0xc09 -#define CSR_HPMCOUNTER10 0xc0a -#define CSR_HPMCOUNTER11 0xc0b -#define CSR_HPMCOUNTER12 0xc0c -#define CSR_HPMCOUNTER13 0xc0d -#define CSR_HPMCOUNTER14 0xc0e -#define CSR_HPMCOUNTER15 0xc0f -#define CSR_HPMCOUNTER16 0xc10 -#define CSR_HPMCOUNTER17 0xc11 -#define CSR_HPMCOUNTER18 0xc12 -#define CSR_HPMCOUNTER19 0xc13 -#define CSR_HPMCOUNTER20 0xc14 -#define CSR_HPMCOUNTER21 0xc15 -#define CSR_HPMCOUNTER22 0xc16 -#define CSR_HPMCOUNTER23 0xc17 -#define CSR_HPMCOUNTER24 0xc18 -#define CSR_HPMCOUNTER25 0xc19 -#define CSR_HPMCOUNTER26 0xc1a -#define CSR_HPMCOUNTER27 0xc1b -#define CSR_HPMCOUNTER28 0xc1c -#define CSR_HPMCOUNTER29 0xc1d -#define CSR_HPMCOUNTER30 0xc1e -#define CSR_HPMCOUNTER31 0xc1f -#define CSR_SSTATUS 0x100 -#define CSR_SIE 0x104 -#define CSR_STVEC 0x105 -#define CSR_SCOUNTEREN 0x106 -#define CSR_SSCRATCH 0x140 -#define CSR_SEPC 0x141 -#define CSR_SCAUSE 0x142 -#define CSR_STVAL 0x143 -#define CSR_SIP 0x144 -#define CSR_SATP 0x180 -#define CSR_MSTATUS 0x300 -#define CSR_MISA 0x301 -#define CSR_MEDELEG 0x302 -#define CSR_MIDELEG 0x303 -#define CSR_MIE 0x304 -#define CSR_MTVEC 0x305 -#define CSR_MCOUNTEREN 0x306 -#define CSR_MSCRATCH 0x340 -#define CSR_MEPC 0x341 -#define CSR_MCAUSE 0x342 -#define CSR_MTVAL 0x343 -#define CSR_MIP 0x344 -#define CSR_PMPCFG0 0x3a0 -#define CSR_PMPCFG1 0x3a1 -#define CSR_PMPCFG2 0x3a2 -#define CSR_PMPCFG3 0x3a3 -#define CSR_PMPADDR0 0x3b0 -#define CSR_PMPADDR1 0x3b1 -#define CSR_PMPADDR2 0x3b2 -#define CSR_PMPADDR3 0x3b3 -#define CSR_PMPADDR4 0x3b4 -#define CSR_PMPADDR5 0x3b5 -#define CSR_PMPADDR6 0x3b6 -#define CSR_PMPADDR7 0x3b7 -#define CSR_PMPADDR8 0x3b8 -#define CSR_PMPADDR9 0x3b9 -#define CSR_PMPADDR10 0x3ba -#define CSR_PMPADDR11 0x3bb -#define CSR_PMPADDR12 0x3bc -#define CSR_PMPADDR13 0x3bd -#define CSR_PMPADDR14 0x3be -#define CSR_PMPADDR15 0x3bf -#define CSR_TSELECT 0x7a0 -#define CSR_TDATA1 0x7a1 -#define CSR_TDATA2 0x7a2 -#define CSR_TDATA3 0x7a3 -#define CSR_DCSR 0x7b0 -#define CSR_DPC 0x7b1 -#define CSR_DSCRATCH 0x7b2 -#define CSR_MCYCLE 0xb00 -#define CSR_MINSTRET 0xb02 -#define CSR_MHPMCOUNTER3 0xb03 -#define CSR_MHPMCOUNTER4 0xb04 -#define CSR_MHPMCOUNTER5 0xb05 -#define CSR_MHPMCOUNTER6 0xb06 -#define CSR_MHPMCOUNTER7 0xb07 -#define CSR_MHPMCOUNTER8 0xb08 -#define CSR_MHPMCOUNTER9 0xb09 -#define CSR_MHPMCOUNTER10 0xb0a -#define CSR_MHPMCOUNTER11 0xb0b -#define CSR_MHPMCOUNTER12 0xb0c -#define CSR_MHPMCOUNTER13 0xb0d -#define CSR_MHPMCOUNTER14 0xb0e -#define CSR_MHPMCOUNTER15 0xb0f -#define CSR_MHPMCOUNTER16 0xb10 -#define CSR_MHPMCOUNTER17 0xb11 -#define CSR_MHPMCOUNTER18 0xb12 -#define CSR_MHPMCOUNTER19 0xb13 -#define CSR_MHPMCOUNTER20 0xb14 -#define CSR_MHPMCOUNTER21 0xb15 -#define CSR_MHPMCOUNTER22 0xb16 -#define CSR_MHPMCOUNTER23 0xb17 -#define CSR_MHPMCOUNTER24 0xb18 -#define CSR_MHPMCOUNTER25 0xb19 -#define CSR_MHPMCOUNTER26 0xb1a -#define CSR_MHPMCOUNTER27 0xb1b -#define CSR_MHPMCOUNTER28 0xb1c -#define CSR_MHPMCOUNTER29 0xb1d -#define CSR_MHPMCOUNTER30 0xb1e -#define CSR_MHPMCOUNTER31 0xb1f -#define CSR_MHPMEVENT3 0x323 -#define CSR_MHPMEVENT4 0x324 -#define CSR_MHPMEVENT5 0x325 -#define CSR_MHPMEVENT6 0x326 -#define CSR_MHPMEVENT7 0x327 -#define CSR_MHPMEVENT8 0x328 -#define CSR_MHPMEVENT9 0x329 -#define CSR_MHPMEVENT10 0x32a -#define CSR_MHPMEVENT11 0x32b -#define CSR_MHPMEVENT12 0x32c -#define CSR_MHPMEVENT13 0x32d -#define CSR_MHPMEVENT14 0x32e -#define CSR_MHPMEVENT15 0x32f -#define CSR_MHPMEVENT16 0x330 -#define CSR_MHPMEVENT17 0x331 -#define CSR_MHPMEVENT18 0x332 -#define CSR_MHPMEVENT19 0x333 -#define CSR_MHPMEVENT20 0x334 -#define CSR_MHPMEVENT21 0x335 -#define CSR_MHPMEVENT22 0x336 -#define CSR_MHPMEVENT23 0x337 -#define CSR_MHPMEVENT24 0x338 -#define CSR_MHPMEVENT25 0x339 -#define CSR_MHPMEVENT26 0x33a -#define CSR_MHPMEVENT27 0x33b -#define CSR_MHPMEVENT28 0x33c -#define CSR_MHPMEVENT29 0x33d -#define CSR_MHPMEVENT30 0x33e -#define CSR_MHPMEVENT31 0x33f -#define CSR_MVENDORID 0xf11 -#define CSR_MARCHID 0xf12 -#define CSR_MIMPID 0xf13 -#define CSR_MHARTID 0xf14 -#define CSR_CYCLEH 0xc80 -#define CSR_TIMEH 0xc81 -#define CSR_INSTRETH 0xc82 -#define CSR_HPMCOUNTER3H 0xc83 -#define CSR_HPMCOUNTER4H 0xc84 -#define CSR_HPMCOUNTER5H 0xc85 -#define CSR_HPMCOUNTER6H 0xc86 -#define CSR_HPMCOUNTER7H 0xc87 -#define CSR_HPMCOUNTER8H 0xc88 -#define CSR_HPMCOUNTER9H 0xc89 -#define CSR_HPMCOUNTER10H 0xc8a -#define CSR_HPMCOUNTER11H 0xc8b -#define CSR_HPMCOUNTER12H 0xc8c -#define CSR_HPMCOUNTER13H 0xc8d -#define CSR_HPMCOUNTER14H 0xc8e -#define CSR_HPMCOUNTER15H 0xc8f -#define CSR_HPMCOUNTER16H 0xc90 -#define CSR_HPMCOUNTER17H 0xc91 -#define CSR_HPMCOUNTER18H 0xc92 -#define CSR_HPMCOUNTER19H 0xc93 -#define CSR_HPMCOUNTER20H 0xc94 -#define CSR_HPMCOUNTER21H 0xc95 -#define CSR_HPMCOUNTER22H 0xc96 -#define CSR_HPMCOUNTER23H 0xc97 -#define CSR_HPMCOUNTER24H 0xc98 -#define CSR_HPMCOUNTER25H 0xc99 -#define CSR_HPMCOUNTER26H 0xc9a -#define CSR_HPMCOUNTER27H 0xc9b -#define CSR_HPMCOUNTER28H 0xc9c -#define CSR_HPMCOUNTER29H 0xc9d -#define CSR_HPMCOUNTER30H 0xc9e -#define CSR_HPMCOUNTER31H 0xc9f -#define CSR_MCYCLEH 0xb80 -#define CSR_MINSTRETH 0xb82 -#define CSR_MHPMCOUNTER3H 0xb83 -#define CSR_MHPMCOUNTER4H 0xb84 -#define CSR_MHPMCOUNTER5H 0xb85 -#define CSR_MHPMCOUNTER6H 0xb86 -#define CSR_MHPMCOUNTER7H 0xb87 -#define CSR_MHPMCOUNTER8H 0xb88 -#define CSR_MHPMCOUNTER9H 0xb89 -#define CSR_MHPMCOUNTER10H 0xb8a -#define CSR_MHPMCOUNTER11H 0xb8b -#define CSR_MHPMCOUNTER12H 0xb8c -#define CSR_MHPMCOUNTER13H 0xb8d -#define CSR_MHPMCOUNTER14H 0xb8e -#define CSR_MHPMCOUNTER15H 0xb8f -#define CSR_MHPMCOUNTER16H 0xb90 -#define CSR_MHPMCOUNTER17H 0xb91 -#define CSR_MHPMCOUNTER18H 0xb92 -#define CSR_MHPMCOUNTER19H 0xb93 -#define CSR_MHPMCOUNTER20H 0xb94 -#define CSR_MHPMCOUNTER21H 0xb95 -#define CSR_MHPMCOUNTER22H 0xb96 -#define CSR_MHPMCOUNTER23H 0xb97 -#define CSR_MHPMCOUNTER24H 0xb98 -#define CSR_MHPMCOUNTER25H 0xb99 -#define CSR_MHPMCOUNTER26H 0xb9a -#define CSR_MHPMCOUNTER27H 0xb9b -#define CSR_MHPMCOUNTER28H 0xb9c -#define CSR_MHPMCOUNTER29H 0xb9d -#define CSR_MHPMCOUNTER30H 0xb9e -#define CSR_MHPMCOUNTER31H 0xb9f -#define CAUSE_MISALIGNED_FETCH 0x0 -#define CAUSE_FETCH_ACCESS 0x1 -#define CAUSE_ILLEGAL_INSTRUCTION 0x2 -#define CAUSE_BREAKPOINT 0x3 -#define CAUSE_MISALIGNED_LOAD 0x4 -#define CAUSE_LOAD_ACCESS 0x5 -#define CAUSE_MISALIGNED_STORE 0x6 -#define CAUSE_STORE_ACCESS 0x7 -#define CAUSE_USER_ECALL 0x8 -#define CAUSE_SUPERVISOR_ECALL 0x9 -#define CAUSE_HYPERVISOR_ECALL 0xa -#define CAUSE_MACHINE_ECALL 0xb -#define CAUSE_FETCH_PAGE_FAULT 0xc -#define CAUSE_LOAD_PAGE_FAULT 0xd -#define CAUSE_STORE_PAGE_FAULT 0xf -#endif -#ifdef DECLARE_INSN -DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) -DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) -DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) -DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) -DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) -DECLARE_INSN(bgeu, MATCH_BGEU, MASK_BGEU) -DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) -DECLARE_INSN(jal, MATCH_JAL, MASK_JAL) -DECLARE_INSN(lui, MATCH_LUI, MASK_LUI) -DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) -DECLARE_INSN(addi, MATCH_ADDI, MASK_ADDI) -DECLARE_INSN(slli, MATCH_SLLI, MASK_SLLI) -DECLARE_INSN(slti, MATCH_SLTI, MASK_SLTI) -DECLARE_INSN(sltiu, MATCH_SLTIU, MASK_SLTIU) -DECLARE_INSN(xori, MATCH_XORI, MASK_XORI) -DECLARE_INSN(srli, MATCH_SRLI, MASK_SRLI) -DECLARE_INSN(srai, MATCH_SRAI, MASK_SRAI) -DECLARE_INSN(ori, MATCH_ORI, MASK_ORI) -DECLARE_INSN(andi, MATCH_ANDI, MASK_ANDI) -DECLARE_INSN(add, MATCH_ADD, MASK_ADD) -DECLARE_INSN(sub, MATCH_SUB, MASK_SUB) -DECLARE_INSN(sll, MATCH_SLL, MASK_SLL) -DECLARE_INSN(slt, MATCH_SLT, MASK_SLT) -DECLARE_INSN(sltu, MATCH_SLTU, MASK_SLTU) -DECLARE_INSN(xor, MATCH_XOR, MASK_XOR) -DECLARE_INSN(srl, MATCH_SRL, MASK_SRL) -DECLARE_INSN(sra, MATCH_SRA, MASK_SRA) -DECLARE_INSN(or, MATCH_OR, MASK_OR) -DECLARE_INSN(and, MATCH_AND, MASK_AND) -DECLARE_INSN(addiw, MATCH_ADDIW, MASK_ADDIW) -DECLARE_INSN(slliw, MATCH_SLLIW, MASK_SLLIW) -DECLARE_INSN(srliw, MATCH_SRLIW, MASK_SRLIW) -DECLARE_INSN(sraiw, MATCH_SRAIW, MASK_SRAIW) -DECLARE_INSN(addw, MATCH_ADDW, MASK_ADDW) -DECLARE_INSN(subw, MATCH_SUBW, MASK_SUBW) -DECLARE_INSN(sllw, MATCH_SLLW, MASK_SLLW) -DECLARE_INSN(srlw, MATCH_SRLW, MASK_SRLW) -DECLARE_INSN(sraw, MATCH_SRAW, MASK_SRAW) -DECLARE_INSN(lb, MATCH_LB, MASK_LB) -DECLARE_INSN(lh, MATCH_LH, MASK_LH) -DECLARE_INSN(lw, MATCH_LW, MASK_LW) -DECLARE_INSN(ld, MATCH_LD, MASK_LD) -DECLARE_INSN(lbu, MATCH_LBU, MASK_LBU) -DECLARE_INSN(lhu, MATCH_LHU, MASK_LHU) -DECLARE_INSN(lwu, MATCH_LWU, MASK_LWU) -DECLARE_INSN(sb, MATCH_SB, MASK_SB) -DECLARE_INSN(sh, MATCH_SH, MASK_SH) -DECLARE_INSN(sw, MATCH_SW, MASK_SW) -DECLARE_INSN(sd, MATCH_SD, MASK_SD) -DECLARE_INSN(fence, MATCH_FENCE, MASK_FENCE) -DECLARE_INSN(fence_i, MATCH_FENCE_I, MASK_FENCE_I) -DECLARE_INSN(mul, MATCH_MUL, MASK_MUL) -DECLARE_INSN(mulh, MATCH_MULH, MASK_MULH) -DECLARE_INSN(mulhsu, MATCH_MULHSU, MASK_MULHSU) -DECLARE_INSN(mulhu, MATCH_MULHU, MASK_MULHU) -DECLARE_INSN(div, MATCH_DIV, MASK_DIV) -DECLARE_INSN(divu, MATCH_DIVU, MASK_DIVU) -DECLARE_INSN(rem, MATCH_REM, MASK_REM) -DECLARE_INSN(remu, MATCH_REMU, MASK_REMU) -DECLARE_INSN(mulw, MATCH_MULW, MASK_MULW) -DECLARE_INSN(divw, MATCH_DIVW, MASK_DIVW) -DECLARE_INSN(divuw, MATCH_DIVUW, MASK_DIVUW) -DECLARE_INSN(remw, MATCH_REMW, MASK_REMW) -DECLARE_INSN(remuw, MATCH_REMUW, MASK_REMUW) -DECLARE_INSN(amoadd_w, MATCH_AMOADD_W, MASK_AMOADD_W) -DECLARE_INSN(amoxor_w, MATCH_AMOXOR_W, MASK_AMOXOR_W) -DECLARE_INSN(amoor_w, MATCH_AMOOR_W, MASK_AMOOR_W) -DECLARE_INSN(amoand_w, MATCH_AMOAND_W, MASK_AMOAND_W) -DECLARE_INSN(amomin_w, MATCH_AMOMIN_W, MASK_AMOMIN_W) -DECLARE_INSN(amomax_w, MATCH_AMOMAX_W, MASK_AMOMAX_W) -DECLARE_INSN(amominu_w, MATCH_AMOMINU_W, MASK_AMOMINU_W) -DECLARE_INSN(amomaxu_w, MATCH_AMOMAXU_W, MASK_AMOMAXU_W) -DECLARE_INSN(amoswap_w, MATCH_AMOSWAP_W, MASK_AMOSWAP_W) -DECLARE_INSN(lr_w, MATCH_LR_W, MASK_LR_W) -DECLARE_INSN(sc_w, MATCH_SC_W, MASK_SC_W) -DECLARE_INSN(amoadd_d, MATCH_AMOADD_D, MASK_AMOADD_D) -DECLARE_INSN(amoxor_d, MATCH_AMOXOR_D, MASK_AMOXOR_D) -DECLARE_INSN(amoor_d, MATCH_AMOOR_D, MASK_AMOOR_D) -DECLARE_INSN(amoand_d, MATCH_AMOAND_D, MASK_AMOAND_D) -DECLARE_INSN(amomin_d, MATCH_AMOMIN_D, MASK_AMOMIN_D) -DECLARE_INSN(amomax_d, MATCH_AMOMAX_D, MASK_AMOMAX_D) -DECLARE_INSN(amominu_d, MATCH_AMOMINU_D, MASK_AMOMINU_D) -DECLARE_INSN(amomaxu_d, MATCH_AMOMAXU_D, MASK_AMOMAXU_D) -DECLARE_INSN(amoswap_d, MATCH_AMOSWAP_D, MASK_AMOSWAP_D) -DECLARE_INSN(lr_d, MATCH_LR_D, MASK_LR_D) -DECLARE_INSN(sc_d, MATCH_SC_D, MASK_SC_D) -DECLARE_INSN(ecall, MATCH_ECALL, MASK_ECALL) -DECLARE_INSN(ebreak, MATCH_EBREAK, MASK_EBREAK) -DECLARE_INSN(uret, MATCH_URET, MASK_URET) -DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) -DECLARE_INSN(mret, MATCH_MRET, MASK_MRET) -DECLARE_INSN(dret, MATCH_DRET, MASK_DRET) -DECLARE_INSN(sfence_vma, MATCH_SFENCE_VMA, MASK_SFENCE_VMA) -DECLARE_INSN(wfi, MATCH_WFI, MASK_WFI) -DECLARE_INSN(csrrw, MATCH_CSRRW, MASK_CSRRW) -DECLARE_INSN(csrrs, MATCH_CSRRS, MASK_CSRRS) -DECLARE_INSN(csrrc, MATCH_CSRRC, MASK_CSRRC) -DECLARE_INSN(csrrwi, MATCH_CSRRWI, MASK_CSRRWI) -DECLARE_INSN(csrrsi, MATCH_CSRRSI, MASK_CSRRSI) -DECLARE_INSN(csrrci, MATCH_CSRRCI, MASK_CSRRCI) -DECLARE_INSN(fadd_s, MATCH_FADD_S, MASK_FADD_S) -DECLARE_INSN(fsub_s, MATCH_FSUB_S, MASK_FSUB_S) -DECLARE_INSN(fmul_s, MATCH_FMUL_S, MASK_FMUL_S) -DECLARE_INSN(fdiv_s, MATCH_FDIV_S, MASK_FDIV_S) -DECLARE_INSN(fsgnj_s, MATCH_FSGNJ_S, MASK_FSGNJ_S) -DECLARE_INSN(fsgnjn_s, MATCH_FSGNJN_S, MASK_FSGNJN_S) -DECLARE_INSN(fsgnjx_s, MATCH_FSGNJX_S, MASK_FSGNJX_S) -DECLARE_INSN(fmin_s, MATCH_FMIN_S, MASK_FMIN_S) -DECLARE_INSN(fmax_s, MATCH_FMAX_S, MASK_FMAX_S) -DECLARE_INSN(fsqrt_s, MATCH_FSQRT_S, MASK_FSQRT_S) -DECLARE_INSN(fadd_d, MATCH_FADD_D, MASK_FADD_D) -DECLARE_INSN(fsub_d, MATCH_FSUB_D, MASK_FSUB_D) -DECLARE_INSN(fmul_d, MATCH_FMUL_D, MASK_FMUL_D) -DECLARE_INSN(fdiv_d, MATCH_FDIV_D, MASK_FDIV_D) -DECLARE_INSN(fsgnj_d, MATCH_FSGNJ_D, MASK_FSGNJ_D) -DECLARE_INSN(fsgnjn_d, MATCH_FSGNJN_D, MASK_FSGNJN_D) -DECLARE_INSN(fsgnjx_d, MATCH_FSGNJX_D, MASK_FSGNJX_D) -DECLARE_INSN(fmin_d, MATCH_FMIN_D, MASK_FMIN_D) -DECLARE_INSN(fmax_d, MATCH_FMAX_D, MASK_FMAX_D) -DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D) -DECLARE_INSN(fcvt_d_s, MATCH_FCVT_D_S, MASK_FCVT_D_S) -DECLARE_INSN(fsqrt_d, MATCH_FSQRT_D, MASK_FSQRT_D) -DECLARE_INSN(fadd_q, MATCH_FADD_Q, MASK_FADD_Q) -DECLARE_INSN(fsub_q, MATCH_FSUB_Q, MASK_FSUB_Q) -DECLARE_INSN(fmul_q, MATCH_FMUL_Q, MASK_FMUL_Q) -DECLARE_INSN(fdiv_q, MATCH_FDIV_Q, MASK_FDIV_Q) -DECLARE_INSN(fsgnj_q, MATCH_FSGNJ_Q, MASK_FSGNJ_Q) -DECLARE_INSN(fsgnjn_q, MATCH_FSGNJN_Q, MASK_FSGNJN_Q) -DECLARE_INSN(fsgnjx_q, MATCH_FSGNJX_Q, MASK_FSGNJX_Q) -DECLARE_INSN(fmin_q, MATCH_FMIN_Q, MASK_FMIN_Q) -DECLARE_INSN(fmax_q, MATCH_FMAX_Q, MASK_FMAX_Q) -DECLARE_INSN(fcvt_s_q, MATCH_FCVT_S_Q, MASK_FCVT_S_Q) -DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S) -DECLARE_INSN(fcvt_d_q, MATCH_FCVT_D_Q, MASK_FCVT_D_Q) -DECLARE_INSN(fcvt_q_d, MATCH_FCVT_Q_D, MASK_FCVT_Q_D) -DECLARE_INSN(fsqrt_q, MATCH_FSQRT_Q, MASK_FSQRT_Q) -DECLARE_INSN(fle_s, MATCH_FLE_S, MASK_FLE_S) -DECLARE_INSN(flt_s, MATCH_FLT_S, MASK_FLT_S) -DECLARE_INSN(feq_s, MATCH_FEQ_S, MASK_FEQ_S) -DECLARE_INSN(fle_d, MATCH_FLE_D, MASK_FLE_D) -DECLARE_INSN(flt_d, MATCH_FLT_D, MASK_FLT_D) -DECLARE_INSN(feq_d, MATCH_FEQ_D, MASK_FEQ_D) -DECLARE_INSN(fle_q, MATCH_FLE_Q, MASK_FLE_Q) -DECLARE_INSN(flt_q, MATCH_FLT_Q, MASK_FLT_Q) -DECLARE_INSN(feq_q, MATCH_FEQ_Q, MASK_FEQ_Q) -DECLARE_INSN(fcvt_w_s, MATCH_FCVT_W_S, MASK_FCVT_W_S) -DECLARE_INSN(fcvt_wu_s, MATCH_FCVT_WU_S, MASK_FCVT_WU_S) -DECLARE_INSN(fcvt_l_s, MATCH_FCVT_L_S, MASK_FCVT_L_S) -DECLARE_INSN(fcvt_lu_s, MATCH_FCVT_LU_S, MASK_FCVT_LU_S) -DECLARE_INSN(fmv_x_w, MATCH_FMV_X_W, MASK_FMV_X_W) -DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S) -DECLARE_INSN(fcvt_w_d, MATCH_FCVT_W_D, MASK_FCVT_W_D) -DECLARE_INSN(fcvt_wu_d, MATCH_FCVT_WU_D, MASK_FCVT_WU_D) -DECLARE_INSN(fcvt_l_d, MATCH_FCVT_L_D, MASK_FCVT_L_D) -DECLARE_INSN(fcvt_lu_d, MATCH_FCVT_LU_D, MASK_FCVT_LU_D) -DECLARE_INSN(fmv_x_d, MATCH_FMV_X_D, MASK_FMV_X_D) -DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D) -DECLARE_INSN(fcvt_w_q, MATCH_FCVT_W_Q, MASK_FCVT_W_Q) -DECLARE_INSN(fcvt_wu_q, MATCH_FCVT_WU_Q, MASK_FCVT_WU_Q) -DECLARE_INSN(fcvt_l_q, MATCH_FCVT_L_Q, MASK_FCVT_L_Q) -DECLARE_INSN(fcvt_lu_q, MATCH_FCVT_LU_Q, MASK_FCVT_LU_Q) -DECLARE_INSN(fmv_x_q, MATCH_FMV_X_Q, MASK_FMV_X_Q) -DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q) -DECLARE_INSN(fcvt_s_w, MATCH_FCVT_S_W, MASK_FCVT_S_W) -DECLARE_INSN(fcvt_s_wu, MATCH_FCVT_S_WU, MASK_FCVT_S_WU) -DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L) -DECLARE_INSN(fcvt_s_lu, MATCH_FCVT_S_LU, MASK_FCVT_S_LU) -DECLARE_INSN(fmv_w_x, MATCH_FMV_W_X, MASK_FMV_W_X) -DECLARE_INSN(fcvt_d_w, MATCH_FCVT_D_W, MASK_FCVT_D_W) -DECLARE_INSN(fcvt_d_wu, MATCH_FCVT_D_WU, MASK_FCVT_D_WU) -DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L) -DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU) -DECLARE_INSN(fmv_d_x, MATCH_FMV_D_X, MASK_FMV_D_X) -DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W) -DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU) -DECLARE_INSN(fcvt_q_l, MATCH_FCVT_Q_L, MASK_FCVT_Q_L) -DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU) -DECLARE_INSN(fmv_q_x, MATCH_FMV_Q_X, MASK_FMV_Q_X) -DECLARE_INSN(flw, MATCH_FLW, MASK_FLW) -DECLARE_INSN(fld, MATCH_FLD, MASK_FLD) -DECLARE_INSN(flq, MATCH_FLQ, MASK_FLQ) -DECLARE_INSN(fsw, MATCH_FSW, MASK_FSW) -DECLARE_INSN(fsd, MATCH_FSD, MASK_FSD) -DECLARE_INSN(fsq, MATCH_FSQ, MASK_FSQ) -DECLARE_INSN(fmadd_s, MATCH_FMADD_S, MASK_FMADD_S) -DECLARE_INSN(fmsub_s, MATCH_FMSUB_S, MASK_FMSUB_S) -DECLARE_INSN(fnmsub_s, MATCH_FNMSUB_S, MASK_FNMSUB_S) -DECLARE_INSN(fnmadd_s, MATCH_FNMADD_S, MASK_FNMADD_S) -DECLARE_INSN(fmadd_d, MATCH_FMADD_D, MASK_FMADD_D) -DECLARE_INSN(fmsub_d, MATCH_FMSUB_D, MASK_FMSUB_D) -DECLARE_INSN(fnmsub_d, MATCH_FNMSUB_D, MASK_FNMSUB_D) -DECLARE_INSN(fnmadd_d, MATCH_FNMADD_D, MASK_FNMADD_D) -DECLARE_INSN(fmadd_q, MATCH_FMADD_Q, MASK_FMADD_Q) -DECLARE_INSN(fmsub_q, MATCH_FMSUB_Q, MASK_FMSUB_Q) -DECLARE_INSN(fnmsub_q, MATCH_FNMSUB_Q, MASK_FNMSUB_Q) -DECLARE_INSN(fnmadd_q, MATCH_FNMADD_Q, MASK_FNMADD_Q) -DECLARE_INSN(c_nop, MATCH_C_NOP, MASK_C_NOP) -DECLARE_INSN(c_addi16sp, MATCH_C_ADDI16SP, MASK_C_ADDI16SP) -DECLARE_INSN(c_jr, MATCH_C_JR, MASK_C_JR) -DECLARE_INSN(c_jalr, MATCH_C_JALR, MASK_C_JALR) -DECLARE_INSN(c_ebreak, MATCH_C_EBREAK, MASK_C_EBREAK) -DECLARE_INSN(c_ld, MATCH_C_LD, MASK_C_LD) -DECLARE_INSN(c_sd, MATCH_C_SD, MASK_C_SD) -DECLARE_INSN(c_addiw, MATCH_C_ADDIW, MASK_C_ADDIW) -DECLARE_INSN(c_ldsp, MATCH_C_LDSP, MASK_C_LDSP) -DECLARE_INSN(c_sdsp, MATCH_C_SDSP, MASK_C_SDSP) -DECLARE_INSN(c_addi4spn, MATCH_C_ADDI4SPN, MASK_C_ADDI4SPN) -DECLARE_INSN(c_fld, MATCH_C_FLD, MASK_C_FLD) -DECLARE_INSN(c_lw, MATCH_C_LW, MASK_C_LW) -DECLARE_INSN(c_flw, MATCH_C_FLW, MASK_C_FLW) -DECLARE_INSN(c_fsd, MATCH_C_FSD, MASK_C_FSD) -DECLARE_INSN(c_sw, MATCH_C_SW, MASK_C_SW) -DECLARE_INSN(c_fsw, MATCH_C_FSW, MASK_C_FSW) -DECLARE_INSN(c_addi, MATCH_C_ADDI, MASK_C_ADDI) -DECLARE_INSN(c_jal, MATCH_C_JAL, MASK_C_JAL) -DECLARE_INSN(c_li, MATCH_C_LI, MASK_C_LI) -DECLARE_INSN(c_lui, MATCH_C_LUI, MASK_C_LUI) -DECLARE_INSN(c_srli, MATCH_C_SRLI, MASK_C_SRLI) -DECLARE_INSN(c_srai, MATCH_C_SRAI, MASK_C_SRAI) -DECLARE_INSN(c_andi, MATCH_C_ANDI, MASK_C_ANDI) -DECLARE_INSN(c_sub, MATCH_C_SUB, MASK_C_SUB) -DECLARE_INSN(c_xor, MATCH_C_XOR, MASK_C_XOR) -DECLARE_INSN(c_or, MATCH_C_OR, MASK_C_OR) -DECLARE_INSN(c_and, MATCH_C_AND, MASK_C_AND) -DECLARE_INSN(c_subw, MATCH_C_SUBW, MASK_C_SUBW) -DECLARE_INSN(c_addw, MATCH_C_ADDW, MASK_C_ADDW) -DECLARE_INSN(c_j, MATCH_C_J, MASK_C_J) -DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) -DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) -DECLARE_INSN(c_slli, MATCH_C_SLLI, MASK_C_SLLI) -DECLARE_INSN(c_fldsp, MATCH_C_FLDSP, MASK_C_FLDSP) -DECLARE_INSN(c_lwsp, MATCH_C_LWSP, MASK_C_LWSP) -DECLARE_INSN(c_flwsp, MATCH_C_FLWSP, MASK_C_FLWSP) -DECLARE_INSN(c_mv, MATCH_C_MV, MASK_C_MV) -DECLARE_INSN(c_add, MATCH_C_ADD, MASK_C_ADD) -DECLARE_INSN(c_fsdsp, MATCH_C_FSDSP, MASK_C_FSDSP) -DECLARE_INSN(c_swsp, MATCH_C_SWSP, MASK_C_SWSP) -DECLARE_INSN(c_fswsp, MATCH_C_FSWSP, MASK_C_FSWSP) -DECLARE_INSN(custom0, MATCH_CUSTOM0, MASK_CUSTOM0) -DECLARE_INSN(custom0_rs1, MATCH_CUSTOM0_RS1, MASK_CUSTOM0_RS1) -DECLARE_INSN(custom0_rs1_rs2, MATCH_CUSTOM0_RS1_RS2, MASK_CUSTOM0_RS1_RS2) -DECLARE_INSN(custom0_rd, MATCH_CUSTOM0_RD, MASK_CUSTOM0_RD) -DECLARE_INSN(custom0_rd_rs1, MATCH_CUSTOM0_RD_RS1, MASK_CUSTOM0_RD_RS1) -DECLARE_INSN(custom0_rd_rs1_rs2, MATCH_CUSTOM0_RD_RS1_RS2, MASK_CUSTOM0_RD_RS1_RS2) -DECLARE_INSN(custom1, MATCH_CUSTOM1, MASK_CUSTOM1) -DECLARE_INSN(custom1_rs1, MATCH_CUSTOM1_RS1, MASK_CUSTOM1_RS1) -DECLARE_INSN(custom1_rs1_rs2, MATCH_CUSTOM1_RS1_RS2, MASK_CUSTOM1_RS1_RS2) -DECLARE_INSN(custom1_rd, MATCH_CUSTOM1_RD, MASK_CUSTOM1_RD) -DECLARE_INSN(custom1_rd_rs1, MATCH_CUSTOM1_RD_RS1, MASK_CUSTOM1_RD_RS1) -DECLARE_INSN(custom1_rd_rs1_rs2, MATCH_CUSTOM1_RD_RS1_RS2, MASK_CUSTOM1_RD_RS1_RS2) -DECLARE_INSN(custom2, MATCH_CUSTOM2, MASK_CUSTOM2) -DECLARE_INSN(custom2_rs1, MATCH_CUSTOM2_RS1, MASK_CUSTOM2_RS1) -DECLARE_INSN(custom2_rs1_rs2, MATCH_CUSTOM2_RS1_RS2, MASK_CUSTOM2_RS1_RS2) -DECLARE_INSN(custom2_rd, MATCH_CUSTOM2_RD, MASK_CUSTOM2_RD) -DECLARE_INSN(custom2_rd_rs1, MATCH_CUSTOM2_RD_RS1, MASK_CUSTOM2_RD_RS1) -DECLARE_INSN(custom2_rd_rs1_rs2, MATCH_CUSTOM2_RD_RS1_RS2, MASK_CUSTOM2_RD_RS1_RS2) -DECLARE_INSN(custom3, MATCH_CUSTOM3, MASK_CUSTOM3) -DECLARE_INSN(custom3_rs1, MATCH_CUSTOM3_RS1, MASK_CUSTOM3_RS1) -DECLARE_INSN(custom3_rs1_rs2, MATCH_CUSTOM3_RS1_RS2, MASK_CUSTOM3_RS1_RS2) -DECLARE_INSN(custom3_rd, MATCH_CUSTOM3_RD, MASK_CUSTOM3_RD) -DECLARE_INSN(custom3_rd_rs1, MATCH_CUSTOM3_RD_RS1, MASK_CUSTOM3_RD_RS1) -DECLARE_INSN(custom3_rd_rs1_rs2, MATCH_CUSTOM3_RD_RS1_RS2, MASK_CUSTOM3_RD_RS1_RS2) -#endif -#ifdef DECLARE_CSR -DECLARE_CSR(fflags, CSR_FFLAGS) -DECLARE_CSR(frm, CSR_FRM) -DECLARE_CSR(fcsr, CSR_FCSR) -DECLARE_CSR(cycle, CSR_CYCLE) -DECLARE_CSR(time, CSR_TIME) -DECLARE_CSR(instret, CSR_INSTRET) -DECLARE_CSR(hpmcounter3, CSR_HPMCOUNTER3) -DECLARE_CSR(hpmcounter4, CSR_HPMCOUNTER4) -DECLARE_CSR(hpmcounter5, CSR_HPMCOUNTER5) -DECLARE_CSR(hpmcounter6, CSR_HPMCOUNTER6) -DECLARE_CSR(hpmcounter7, CSR_HPMCOUNTER7) -DECLARE_CSR(hpmcounter8, CSR_HPMCOUNTER8) -DECLARE_CSR(hpmcounter9, CSR_HPMCOUNTER9) -DECLARE_CSR(hpmcounter10, CSR_HPMCOUNTER10) -DECLARE_CSR(hpmcounter11, CSR_HPMCOUNTER11) -DECLARE_CSR(hpmcounter12, CSR_HPMCOUNTER12) -DECLARE_CSR(hpmcounter13, CSR_HPMCOUNTER13) -DECLARE_CSR(hpmcounter14, CSR_HPMCOUNTER14) -DECLARE_CSR(hpmcounter15, CSR_HPMCOUNTER15) -DECLARE_CSR(hpmcounter16, CSR_HPMCOUNTER16) -DECLARE_CSR(hpmcounter17, CSR_HPMCOUNTER17) -DECLARE_CSR(hpmcounter18, CSR_HPMCOUNTER18) -DECLARE_CSR(hpmcounter19, CSR_HPMCOUNTER19) -DECLARE_CSR(hpmcounter20, CSR_HPMCOUNTER20) -DECLARE_CSR(hpmcounter21, CSR_HPMCOUNTER21) -DECLARE_CSR(hpmcounter22, CSR_HPMCOUNTER22) -DECLARE_CSR(hpmcounter23, CSR_HPMCOUNTER23) -DECLARE_CSR(hpmcounter24, CSR_HPMCOUNTER24) -DECLARE_CSR(hpmcounter25, CSR_HPMCOUNTER25) -DECLARE_CSR(hpmcounter26, CSR_HPMCOUNTER26) -DECLARE_CSR(hpmcounter27, CSR_HPMCOUNTER27) -DECLARE_CSR(hpmcounter28, CSR_HPMCOUNTER28) -DECLARE_CSR(hpmcounter29, CSR_HPMCOUNTER29) -DECLARE_CSR(hpmcounter30, CSR_HPMCOUNTER30) -DECLARE_CSR(hpmcounter31, CSR_HPMCOUNTER31) -DECLARE_CSR(sstatus, CSR_SSTATUS) -DECLARE_CSR(sie, CSR_SIE) -DECLARE_CSR(stvec, CSR_STVEC) -DECLARE_CSR(scounteren, CSR_SCOUNTEREN) -DECLARE_CSR(sscratch, CSR_SSCRATCH) -DECLARE_CSR(sepc, CSR_SEPC) -DECLARE_CSR(scause, CSR_SCAUSE) -DECLARE_CSR(stval, CSR_STVAL) -DECLARE_CSR(sip, CSR_SIP) -DECLARE_CSR(satp, CSR_SATP) -DECLARE_CSR(mstatus, CSR_MSTATUS) -DECLARE_CSR(misa, CSR_MISA) -DECLARE_CSR(medeleg, CSR_MEDELEG) -DECLARE_CSR(mideleg, CSR_MIDELEG) -DECLARE_CSR(mie, CSR_MIE) -DECLARE_CSR(mtvec, CSR_MTVEC) -DECLARE_CSR(mcounteren, CSR_MCOUNTEREN) -DECLARE_CSR(mscratch, CSR_MSCRATCH) -DECLARE_CSR(mepc, CSR_MEPC) -DECLARE_CSR(mcause, CSR_MCAUSE) -DECLARE_CSR(mtval, CSR_MTVAL) -DECLARE_CSR(mip, CSR_MIP) -DECLARE_CSR(pmpcfg0, CSR_PMPCFG0) -DECLARE_CSR(pmpcfg1, CSR_PMPCFG1) -DECLARE_CSR(pmpcfg2, CSR_PMPCFG2) -DECLARE_CSR(pmpcfg3, CSR_PMPCFG3) -DECLARE_CSR(pmpaddr0, CSR_PMPADDR0) -DECLARE_CSR(pmpaddr1, CSR_PMPADDR1) -DECLARE_CSR(pmpaddr2, CSR_PMPADDR2) -DECLARE_CSR(pmpaddr3, CSR_PMPADDR3) -DECLARE_CSR(pmpaddr4, CSR_PMPADDR4) -DECLARE_CSR(pmpaddr5, CSR_PMPADDR5) -DECLARE_CSR(pmpaddr6, CSR_PMPADDR6) -DECLARE_CSR(pmpaddr7, CSR_PMPADDR7) -DECLARE_CSR(pmpaddr8, CSR_PMPADDR8) -DECLARE_CSR(pmpaddr9, CSR_PMPADDR9) -DECLARE_CSR(pmpaddr10, CSR_PMPADDR10) -DECLARE_CSR(pmpaddr11, CSR_PMPADDR11) -DECLARE_CSR(pmpaddr12, CSR_PMPADDR12) -DECLARE_CSR(pmpaddr13, CSR_PMPADDR13) -DECLARE_CSR(pmpaddr14, CSR_PMPADDR14) -DECLARE_CSR(pmpaddr15, CSR_PMPADDR15) -DECLARE_CSR(tselect, CSR_TSELECT) -DECLARE_CSR(tdata1, CSR_TDATA1) -DECLARE_CSR(tdata2, CSR_TDATA2) -DECLARE_CSR(tdata3, CSR_TDATA3) -DECLARE_CSR(dcsr, CSR_DCSR) -DECLARE_CSR(dpc, CSR_DPC) -DECLARE_CSR(dscratch, CSR_DSCRATCH) -DECLARE_CSR(mcycle, CSR_MCYCLE) -DECLARE_CSR(minstret, CSR_MINSTRET) -DECLARE_CSR(mhpmcounter3, CSR_MHPMCOUNTER3) -DECLARE_CSR(mhpmcounter4, CSR_MHPMCOUNTER4) -DECLARE_CSR(mhpmcounter5, CSR_MHPMCOUNTER5) -DECLARE_CSR(mhpmcounter6, CSR_MHPMCOUNTER6) -DECLARE_CSR(mhpmcounter7, CSR_MHPMCOUNTER7) -DECLARE_CSR(mhpmcounter8, CSR_MHPMCOUNTER8) -DECLARE_CSR(mhpmcounter9, CSR_MHPMCOUNTER9) -DECLARE_CSR(mhpmcounter10, CSR_MHPMCOUNTER10) -DECLARE_CSR(mhpmcounter11, CSR_MHPMCOUNTER11) -DECLARE_CSR(mhpmcounter12, CSR_MHPMCOUNTER12) -DECLARE_CSR(mhpmcounter13, CSR_MHPMCOUNTER13) -DECLARE_CSR(mhpmcounter14, CSR_MHPMCOUNTER14) -DECLARE_CSR(mhpmcounter15, CSR_MHPMCOUNTER15) -DECLARE_CSR(mhpmcounter16, CSR_MHPMCOUNTER16) -DECLARE_CSR(mhpmcounter17, CSR_MHPMCOUNTER17) -DECLARE_CSR(mhpmcounter18, CSR_MHPMCOUNTER18) -DECLARE_CSR(mhpmcounter19, CSR_MHPMCOUNTER19) -DECLARE_CSR(mhpmcounter20, CSR_MHPMCOUNTER20) -DECLARE_CSR(mhpmcounter21, CSR_MHPMCOUNTER21) -DECLARE_CSR(mhpmcounter22, CSR_MHPMCOUNTER22) -DECLARE_CSR(mhpmcounter23, CSR_MHPMCOUNTER23) -DECLARE_CSR(mhpmcounter24, CSR_MHPMCOUNTER24) -DECLARE_CSR(mhpmcounter25, CSR_MHPMCOUNTER25) -DECLARE_CSR(mhpmcounter26, CSR_MHPMCOUNTER26) -DECLARE_CSR(mhpmcounter27, CSR_MHPMCOUNTER27) -DECLARE_CSR(mhpmcounter28, CSR_MHPMCOUNTER28) -DECLARE_CSR(mhpmcounter29, CSR_MHPMCOUNTER29) -DECLARE_CSR(mhpmcounter30, CSR_MHPMCOUNTER30) -DECLARE_CSR(mhpmcounter31, CSR_MHPMCOUNTER31) -DECLARE_CSR(mhpmevent3, CSR_MHPMEVENT3) -DECLARE_CSR(mhpmevent4, CSR_MHPMEVENT4) -DECLARE_CSR(mhpmevent5, CSR_MHPMEVENT5) -DECLARE_CSR(mhpmevent6, CSR_MHPMEVENT6) -DECLARE_CSR(mhpmevent7, CSR_MHPMEVENT7) -DECLARE_CSR(mhpmevent8, CSR_MHPMEVENT8) -DECLARE_CSR(mhpmevent9, CSR_MHPMEVENT9) -DECLARE_CSR(mhpmevent10, CSR_MHPMEVENT10) -DECLARE_CSR(mhpmevent11, CSR_MHPMEVENT11) -DECLARE_CSR(mhpmevent12, CSR_MHPMEVENT12) -DECLARE_CSR(mhpmevent13, CSR_MHPMEVENT13) -DECLARE_CSR(mhpmevent14, CSR_MHPMEVENT14) -DECLARE_CSR(mhpmevent15, CSR_MHPMEVENT15) -DECLARE_CSR(mhpmevent16, CSR_MHPMEVENT16) -DECLARE_CSR(mhpmevent17, CSR_MHPMEVENT17) -DECLARE_CSR(mhpmevent18, CSR_MHPMEVENT18) -DECLARE_CSR(mhpmevent19, CSR_MHPMEVENT19) -DECLARE_CSR(mhpmevent20, CSR_MHPMEVENT20) -DECLARE_CSR(mhpmevent21, CSR_MHPMEVENT21) -DECLARE_CSR(mhpmevent22, CSR_MHPMEVENT22) -DECLARE_CSR(mhpmevent23, CSR_MHPMEVENT23) -DECLARE_CSR(mhpmevent24, CSR_MHPMEVENT24) -DECLARE_CSR(mhpmevent25, CSR_MHPMEVENT25) -DECLARE_CSR(mhpmevent26, CSR_MHPMEVENT26) -DECLARE_CSR(mhpmevent27, CSR_MHPMEVENT27) -DECLARE_CSR(mhpmevent28, CSR_MHPMEVENT28) -DECLARE_CSR(mhpmevent29, CSR_MHPMEVENT29) -DECLARE_CSR(mhpmevent30, CSR_MHPMEVENT30) -DECLARE_CSR(mhpmevent31, CSR_MHPMEVENT31) -DECLARE_CSR(mvendorid, CSR_MVENDORID) -DECLARE_CSR(marchid, CSR_MARCHID) -DECLARE_CSR(mimpid, CSR_MIMPID) -DECLARE_CSR(mhartid, CSR_MHARTID) -DECLARE_CSR(cycleh, CSR_CYCLEH) -DECLARE_CSR(timeh, CSR_TIMEH) -DECLARE_CSR(instreth, CSR_INSTRETH) -DECLARE_CSR(hpmcounter3h, CSR_HPMCOUNTER3H) -DECLARE_CSR(hpmcounter4h, CSR_HPMCOUNTER4H) -DECLARE_CSR(hpmcounter5h, CSR_HPMCOUNTER5H) -DECLARE_CSR(hpmcounter6h, CSR_HPMCOUNTER6H) -DECLARE_CSR(hpmcounter7h, CSR_HPMCOUNTER7H) -DECLARE_CSR(hpmcounter8h, CSR_HPMCOUNTER8H) -DECLARE_CSR(hpmcounter9h, CSR_HPMCOUNTER9H) -DECLARE_CSR(hpmcounter10h, CSR_HPMCOUNTER10H) -DECLARE_CSR(hpmcounter11h, CSR_HPMCOUNTER11H) -DECLARE_CSR(hpmcounter12h, CSR_HPMCOUNTER12H) -DECLARE_CSR(hpmcounter13h, CSR_HPMCOUNTER13H) -DECLARE_CSR(hpmcounter14h, CSR_HPMCOUNTER14H) -DECLARE_CSR(hpmcounter15h, CSR_HPMCOUNTER15H) -DECLARE_CSR(hpmcounter16h, CSR_HPMCOUNTER16H) -DECLARE_CSR(hpmcounter17h, CSR_HPMCOUNTER17H) -DECLARE_CSR(hpmcounter18h, CSR_HPMCOUNTER18H) -DECLARE_CSR(hpmcounter19h, CSR_HPMCOUNTER19H) -DECLARE_CSR(hpmcounter20h, CSR_HPMCOUNTER20H) -DECLARE_CSR(hpmcounter21h, CSR_HPMCOUNTER21H) -DECLARE_CSR(hpmcounter22h, CSR_HPMCOUNTER22H) -DECLARE_CSR(hpmcounter23h, CSR_HPMCOUNTER23H) -DECLARE_CSR(hpmcounter24h, CSR_HPMCOUNTER24H) -DECLARE_CSR(hpmcounter25h, CSR_HPMCOUNTER25H) -DECLARE_CSR(hpmcounter26h, CSR_HPMCOUNTER26H) -DECLARE_CSR(hpmcounter27h, CSR_HPMCOUNTER27H) -DECLARE_CSR(hpmcounter28h, CSR_HPMCOUNTER28H) -DECLARE_CSR(hpmcounter29h, CSR_HPMCOUNTER29H) -DECLARE_CSR(hpmcounter30h, CSR_HPMCOUNTER30H) -DECLARE_CSR(hpmcounter31h, CSR_HPMCOUNTER31H) -DECLARE_CSR(mcycleh, CSR_MCYCLEH) -DECLARE_CSR(minstreth, CSR_MINSTRETH) -DECLARE_CSR(mhpmcounter3h, CSR_MHPMCOUNTER3H) -DECLARE_CSR(mhpmcounter4h, CSR_MHPMCOUNTER4H) -DECLARE_CSR(mhpmcounter5h, CSR_MHPMCOUNTER5H) -DECLARE_CSR(mhpmcounter6h, CSR_MHPMCOUNTER6H) -DECLARE_CSR(mhpmcounter7h, CSR_MHPMCOUNTER7H) -DECLARE_CSR(mhpmcounter8h, CSR_MHPMCOUNTER8H) -DECLARE_CSR(mhpmcounter9h, CSR_MHPMCOUNTER9H) -DECLARE_CSR(mhpmcounter10h, CSR_MHPMCOUNTER10H) -DECLARE_CSR(mhpmcounter11h, CSR_MHPMCOUNTER11H) -DECLARE_CSR(mhpmcounter12h, CSR_MHPMCOUNTER12H) -DECLARE_CSR(mhpmcounter13h, CSR_MHPMCOUNTER13H) -DECLARE_CSR(mhpmcounter14h, CSR_MHPMCOUNTER14H) -DECLARE_CSR(mhpmcounter15h, CSR_MHPMCOUNTER15H) -DECLARE_CSR(mhpmcounter16h, CSR_MHPMCOUNTER16H) -DECLARE_CSR(mhpmcounter17h, CSR_MHPMCOUNTER17H) -DECLARE_CSR(mhpmcounter18h, CSR_MHPMCOUNTER18H) -DECLARE_CSR(mhpmcounter19h, CSR_MHPMCOUNTER19H) -DECLARE_CSR(mhpmcounter20h, CSR_MHPMCOUNTER20H) -DECLARE_CSR(mhpmcounter21h, CSR_MHPMCOUNTER21H) -DECLARE_CSR(mhpmcounter22h, CSR_MHPMCOUNTER22H) -DECLARE_CSR(mhpmcounter23h, CSR_MHPMCOUNTER23H) -DECLARE_CSR(mhpmcounter24h, CSR_MHPMCOUNTER24H) -DECLARE_CSR(mhpmcounter25h, CSR_MHPMCOUNTER25H) -DECLARE_CSR(mhpmcounter26h, CSR_MHPMCOUNTER26H) -DECLARE_CSR(mhpmcounter27h, CSR_MHPMCOUNTER27H) -DECLARE_CSR(mhpmcounter28h, CSR_MHPMCOUNTER28H) -DECLARE_CSR(mhpmcounter29h, CSR_MHPMCOUNTER29H) -DECLARE_CSR(mhpmcounter30h, CSR_MHPMCOUNTER30H) -DECLARE_CSR(mhpmcounter31h, CSR_MHPMCOUNTER31H) -#endif -#ifdef DECLARE_CAUSE -DECLARE_CAUSE("misaligned fetch", CAUSE_MISALIGNED_FETCH) -DECLARE_CAUSE("fetch access", CAUSE_FETCH_ACCESS) -DECLARE_CAUSE("illegal instruction", CAUSE_ILLEGAL_INSTRUCTION) -DECLARE_CAUSE("breakpoint", CAUSE_BREAKPOINT) -DECLARE_CAUSE("misaligned load", CAUSE_MISALIGNED_LOAD) -DECLARE_CAUSE("load access", CAUSE_LOAD_ACCESS) -DECLARE_CAUSE("misaligned store", CAUSE_MISALIGNED_STORE) -DECLARE_CAUSE("store access", CAUSE_STORE_ACCESS) -DECLARE_CAUSE("user_ecall", CAUSE_USER_ECALL) -DECLARE_CAUSE("supervisor_ecall", CAUSE_SUPERVISOR_ECALL) -DECLARE_CAUSE("hypervisor_ecall", CAUSE_HYPERVISOR_ECALL) -DECLARE_CAUSE("machine_ecall", CAUSE_MACHINE_ECALL) -DECLARE_CAUSE("fetch page fault", CAUSE_FETCH_PAGE_FAULT) -DECLARE_CAUSE("load page fault", CAUSE_LOAD_PAGE_FAULT) -DECLARE_CAUSE("store page fault", CAUSE_STORE_PAGE_FAULT) -#endif diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/link.ld b/benchmarks/riscv-coremark/riscv64-baremetal/link.ld deleted file mode 100644 index 4f8892ee2..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/link.ld +++ /dev/null @@ -1,66 +0,0 @@ -/*======================================================================*/ -/* Proxy kernel linker script */ -/*======================================================================*/ -/* This is the linker script used when building the proxy kernel. */ - -/*----------------------------------------------------------------------*/ -/* Setup */ -/*----------------------------------------------------------------------*/ - -/* The OUTPUT_ARCH command specifies the machine architecture where the - argument is one of the names used in the BFD library. More - specifically one of the entires in bfd/cpu-mips.c */ - -OUTPUT_ARCH( "riscv" ) -ENTRY(_start) - -/*----------------------------------------------------------------------*/ -/* Sections */ -/*----------------------------------------------------------------------*/ - -SECTIONS -{ - - /* text: test code section */ - . = 0x80000000; - .text.init : { *(.text.init) } - - . = ALIGN(0x1000); - .tohost : { *(.tohost) } - - .text : { *(.text) } - - /* data segment */ - .data : { *(.data) } - - .sdata : { - __global_pointer$ = . + 0x800; - *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) - *(.sdata .sdata.* .gnu.linkonce.s.*) - } - - /* bss segment */ - .sbss : { - *(.sbss .sbss.* .gnu.linkonce.sb.*) - *(.scommon) - } - .bss : { *(.bss) } - - /* thread-local data segment */ - .tdata : - { - _tls_data = .; - *(.tdata.begin) - *(.tdata) - *(.tdata.end) - } - .tbss : - { - *(.tbss) - *(.tbss.end) - } - - /* End of uninitalized data segement */ - _end = .; -} - diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/syscallbackup.c b/benchmarks/riscv-coremark/riscv64-baremetal/syscallbackup.c deleted file mode 100644 index e4322563c..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/syscallbackup.c +++ /dev/null @@ -1,1072 +0,0 @@ -// See LICENSE for license details. - -#include -#include -#include -#include -#include -#include -#include "util.h" -#undef printf -#define SYS_write 64 -#define ZEROPAD (1<<0) /* Pad with zero */ -#define SIGN (1<<1) /* Unsigned/signed long */ -#define PLUS (1<<2) /* Show plus */ -#define SPACE (1<<3) /* Spacer */ -#define LEFT (1<<4) /* Left justified */ -#define HEX_PREP (1<<5) /* 0x */ -#define UPPERCASE (1<<6) /* 'ABCDEF' */ -typedef size_t ee_size_t; -#define is_digit(c) ((c) >= '0' && (c) <= '9') -/*static ee_size_t strnlen(const char *s, ee_size_t count);*/ -#undef strcmp -static char *digits = "0123456789abcdefghijklmnopqrstuvwxyz"; -static char *upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -char *ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf); -char *fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf); -static void ee_bufcpy(char *d, char *s, int count); -extern volatile uint64_t tohost; -extern volatile uint64_t fromhost; -ee_size_t strnlen(const char *s, ee_size_t count) -{ - const char *sc; - for (sc = s; *sc != '\0' && count--; ++sc); - return sc - s; -} -static char *number(char *str, long num, int base, int size, int precision, int type) -{ - char c, sign, tmp[66]; - char *dig = digits; - int i; - - if (type & UPPERCASE) dig = upper_digits; - if (type & LEFT) type &= ~ZEROPAD; - if (base < 2 || base > 36) return 0; - - c = (type & ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & SIGN) - { - if (num < 0) - { - sign = '-'; - num = -num; - size--; - } - else if (type & PLUS) - { - sign = '+'; - size--; - } - else if (type & SPACE) - { - sign = ' '; - size--; - } - } - - if (type & HEX_PREP) - { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - - i = 0; - - if (num == 0) - tmp[i++] = '0'; - else - { - while (num != 0) - { - tmp[i++] = dig[((unsigned long) num) % (unsigned) base]; - num = ((unsigned long) num) / (unsigned) base; - } - } - - if (i > precision) precision = i; - size -= precision; - if (!(type & (ZEROPAD | LEFT))) while (size-- > 0) *str++ = ' '; - if (sign) *str++ = sign; - - if (type & HEX_PREP) - { - if (base == 8) - *str++ = '0'; - else if (base == 16) - { - *str++ = '0'; - *str++ = digits[33]; - } - } - - if (!(type & LEFT)) while (size-- > 0) *str++ = c; - while (i < precision--) *str++ = '0'; - while (i-- > 0) *str++ = tmp[i]; - while (size-- > 0) *str++ = ' '; - - return str; -} - -static char *eaddr(char *str, unsigned char *addr, int size, int precision, int type) -{ - char tmp[24]; - char *dig = digits; - int i, len; - - if (type & UPPERCASE) dig = upper_digits; - len = 0; - for (i = 0; i < 6; i++) - { - if (i != 0) tmp[len++] = ':'; - tmp[len++] = dig[addr[i] >> 4]; - tmp[len++] = dig[addr[i] & 0x0F]; - } - - if (!(type & LEFT)) while (len < size--) *str++ = ' '; - for (i = 0; i < len; ++i) *str++ = tmp[i]; - while (len < size--) *str++ = ' '; - - return str; -} -static int skip_atoi(const char **s) -{ - int i = 0; - while (is_digit(**s)) i = i*10 + *((*s)++) - '0'; - return i; -} -static char *iaddr(char *str, unsigned char *addr, int size, int precision, int type) -{ - char tmp[24]; - int i, n, len; - - len = 0; - for (i = 0; i < 4; i++) - { - if (i != 0) tmp[len++] = '.'; - n = addr[i]; - - if (n == 0) - tmp[len++] = digits[0]; - else - { - if (n >= 100) - { - tmp[len++] = digits[n / 100]; - n = n % 100; - tmp[len++] = digits[n / 10]; - n = n % 10; - } - else if (n >= 10) - { - tmp[len++] = digits[n / 10]; - n = n % 10; - } - - tmp[len++] = digits[n]; - } - } - - if (!(type & LEFT)) while (len < size--) *str++ = ' '; - for (i = 0; i < len; ++i) *str++ = tmp[i]; - while (len < size--) *str++ = ' '; - - return str; -} - -void ee_bufcpy(char *pd, char *ps, int count) { - char *pe=ps+count; - while (ps!=pe) - *pd++=*ps++; -} - -#if HAS_FLOAT - - - -static void parse_float(double value, char *buffer, char fmt, int precision) -{ - int decpt, sign, exp, pos; - char *digits = NULL; - char cvtbuf[80]; - int capexp = 0; - int magnitude; - - if (fmt == 'G' || fmt == 'E') - { - capexp = 1; - fmt += 'a' - 'A'; - } - - if (fmt == 'g') - { - digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf); - magnitude = decpt - 1; - if (magnitude < -4 || magnitude > precision - 1) - { - fmt = 'e'; - precision -= 1; - } - else - { - fmt = 'f'; - precision -= decpt; - } - } - - if (fmt == 'e') - { - digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf); - - if (sign) *buffer++ = '-'; - *buffer++ = *digits; - if (precision > 0) *buffer++ = '.'; - ee_bufcpy(buffer, digits + 1, precision); - buffer += precision; - *buffer++ = capexp ? 'E' : 'e'; - - if (decpt == 0) - { - if (value == 0.0) - exp = 0; - else - exp = -1; - } - else - exp = decpt - 1; - - if (exp < 0) - { - *buffer++ = '-'; - exp = -exp; - } - else - *buffer++ = '+'; - - buffer[2] = (exp % 10) + '0'; - exp = exp / 10; - buffer[1] = (exp % 10) + '0'; - exp = exp / 10; - buffer[0] = (exp % 10) + '0'; - buffer += 3; - } - else if (fmt == 'f') - { - digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf); - if (sign) *buffer++ = '-'; - if (*digits) - { - if (decpt <= 0) - { - *buffer++ = '0'; - *buffer++ = '.'; - for (pos = 0; pos < -decpt; pos++) *buffer++ = '0'; - while (*digits) *buffer++ = *digits++; - } - else - { - pos = 0; - while (*digits) - { - if (pos++ == decpt) *buffer++ = '.'; - *buffer++ = *digits++; - } - } - } - else - { - *buffer++ = '0'; - if (precision > 0) - { - *buffer++ = '.'; - for (pos = 0; pos < precision; pos++) *buffer++ = '0'; - } - } - } - - *buffer = '\0'; -} - - -static char *flt(char *str, double num, int size, int precision, char fmt, int flags) -{ - char tmp[80]; - char c, sign; - int n, i; - - // Left align means no zero padding - if (flags & LEFT) flags &= ~ZEROPAD; - - // Determine padding and sign char - c = (flags & ZEROPAD) ? '0' : ' '; - sign = 0; - if (flags & SIGN) - { - if (num < 0.0) - { - sign = '-'; - num = -num; - size--; - } - else if (flags & PLUS) - { - sign = '+'; - size--; - } - else if (flags & SPACE) - { - sign = ' '; - size--; - } - } - - // Compute the precision value - if (precision < 0) - precision = 6; // Default precision: 6 - - // Convert floating point number to text - parse_float(num, tmp, fmt, precision); - - if ((flags & HEX_PREP) && precision == 0) decimal_point(tmp); - if (fmt == 'g' && !(flags & HEX_PREP)) cropzeros(tmp); - - n = strnlen(tmp,256); - - // Output number with alignment and padding - size -= n; - if (!(flags & (ZEROPAD | LEFT))) while (size-- > 0) *str++ = ' '; - if (sign) *str++ = sign; - if (!(flags & LEFT)) while (size-- > 0) *str++ = c; - for (i = 0; i < n; i++) *str++ = tmp[i]; - while (size-- > 0) *str++ = ' '; - - return str; -} - - -#endif -static void decimal_point(char *buffer) -{ - while (*buffer) - { - if (*buffer == '.') return; - if (*buffer == 'e' || *buffer == 'E') break; - buffer++; - } - - if (*buffer) - { - int n = strnlen(buffer,256); - while (n > 0) - { - buffer[n + 1] = buffer[n]; - n--; - } - - *buffer = '.'; - } - else - { - *buffer++ = '.'; - *buffer = '\0'; - } -} - -static void cropzeros(char *buffer) -{ - char *stop; - - while (*buffer && *buffer != '.') buffer++; - if (*buffer++) - { - while (*buffer && *buffer != 'e' && *buffer != 'E') buffer++; - stop = buffer--; - while (*buffer == '0') buffer--; - if (*buffer == '.') buffer--; - while (buffer!=stop) - *++buffer=0; - } -} - -static int ee_vsprintf(char *buf, const char *fmt, va_list args) -{ - int len; - unsigned long num; - int i, base; - char *str; - char *s; - - int flags; // Flags to number() - - int field_width; // Width of output field - int precision; // Min. # of digits for integers; max number of chars for from string - int qualifier; // 'h', 'l', or 'L' for integer fields - - for (str = buf; *fmt; fmt++) - { - if (*fmt != '%') - { - *str++ = *fmt; - continue; - } - - // Process flags - flags = 0; -repeat: - fmt++; // This also skips first '%' - switch (*fmt) - { - case '-': flags |= LEFT; goto repeat; - case '+': flags |= PLUS; goto repeat; - case ' ': flags |= SPACE; goto repeat; - case '#': flags |= HEX_PREP; goto repeat; - case '0': flags |= ZEROPAD; goto repeat; - } - - // Get field width - field_width = -1; - if (is_digit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') - { - fmt++; - field_width = va_arg(args, int); - if (field_width < 0) - { - field_width = -field_width; - flags |= LEFT; - } - } - - // Get the precision - precision = -1; - if (*fmt == '.') - { - ++fmt; - if (is_digit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') - { - ++fmt; - precision = va_arg(args, int); - } - if (precision < 0) precision = 0; - } - - // Get the conversion qualifier - qualifier = -1; - if (*fmt == 'l' || *fmt == 'L') - { - qualifier = *fmt; - fmt++; - } - - // Default base - base = 10; - - switch (*fmt) - { - case 'c': - if (!(flags & LEFT)) while (--field_width > 0) *str++ = ' '; - *str++ = (unsigned char) va_arg(args, int); - while (--field_width > 0) *str++ = ' '; - continue; - - case 's': - s = va_arg(args, char *); - if (!s) s = ""; - len = strnlen(s, precision); - if (!(flags & LEFT)) while (len < field_width--) *str++ = ' '; - for (i = 0; i < len; ++i) *str++ = *s++; - while (len < field_width--) *str++ = ' '; - continue; - - case 'p': - if (field_width == -1) - { - field_width = 2 * sizeof(void *); - flags |= ZEROPAD; - } - str = number(str, (unsigned long) va_arg(args, void *), 16, field_width, precision, flags); - continue; - - case 'A': - flags |= UPPERCASE; - - case 'a': - if (qualifier == 'l') - str = eaddr(str, va_arg(args, unsigned char *), field_width, precision, flags); - else - str = iaddr(str, va_arg(args, unsigned char *), field_width, precision, flags); - continue; - - // Integer number formats - set up the flags and "break" - case 'o': - base = 8; - break; - - case 'X': - flags |= UPPERCASE; - - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= SIGN; - - case 'u': - break; - -#if HAS_FLOAT - - case 'f': - str = flt(str, va_arg(args, double), field_width, precision, *fmt, flags | SIGN); - continue; - -#endif - - default: - if (*fmt != '%') *str++ = '%'; - if (*fmt) - *str++ = *fmt; - else - --fmt; - continue; - } - - if (qualifier == 'l') - num = va_arg(args, unsigned long); - else if (flags & SIGN) - num = va_arg(args, int); - else - num = va_arg(args, unsigned int); - - str = number(str, num, base, field_width, precision, flags); - } - - *str = '\0'; - return str - buf; -} - -static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2) -{ - volatile uint64_t magic_mem[8] __attribute__((aligned(64))); - magic_mem[0] = which; - magic_mem[1] = arg0; - magic_mem[2] = arg1; - magic_mem[3] = arg2; - __sync_synchronize(); - - tohost = (uintptr_t)magic_mem; - while (fromhost == 0) - ; - fromhost = 0; - - __sync_synchronize(); - return magic_mem[0]; -} - -#define NUM_COUNTERS 2 -static uintptr_t counters[NUM_COUNTERS]; -static char* counter_names[NUM_COUNTERS]; - -void setStats(int enable) -{ - int i = 0; -#define READ_CTR(name) do { \ - while (i >= NUM_COUNTERS) ; \ - uintptr_t csr = read_csr(name); \ - if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \ - counters[i++] = csr; \ - } while (0) - - READ_CTR(mcycle); - READ_CTR(minstret); - -#undef READ_CTR -} - -/*void __attribute__((noreturn)) tohost_exit(uintptr_t code) -{ - tohost = (code << 1) | 1; - while (1); -}*/ -void __attribute__((noreturn))tohost_exit(uintptr_t code){ - tohost=(code<<1)|1; - asm ("ecall"); - } - - -uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32]) -{ - tohost_exit(1337); -} - -void exit(int code) -{ - tohost_exit(code); -} - -void abort() -{ - exit(128 + SIGABRT); -} - -void printstr(const char* s) -{ - syscall(SYS_write, 1, (uintptr_t)s, strlen(s)); -} - -void __attribute__((weak)) thread_entry(int cid, int nc) -{ - // multi-threaded programs override this function. - // for the case of single-threaded programs, only let core 0 proceed. - while (cid != 0); -} - -int __attribute__((weak)) main(int argc, char** argv) -{ - // single-threaded programs override this function. - printstr("Implement main(), foo!\n"); - return -1; -} - -static void init_tls() -{ - register void* thread_pointer asm("tp"); - extern char _tls_data; - extern __thread char _tdata_begin, _tdata_end, _tbss_end; - size_t tdata_size = &_tdata_end - &_tdata_begin; - memcpy(thread_pointer, &_tls_data, tdata_size); - size_t tbss_size = &_tbss_end - &_tdata_end; - memset(thread_pointer + tdata_size, 0, tbss_size); -} - -void _init(int cid, int nc) -{ - init_tls(); - thread_entry(cid, nc); - - // only single-threaded programs should ever get here. - int ret = main(0, 0); - - char buf[NUM_COUNTERS * 32] __attribute__((aligned(64))); - char* pbuf = buf; - for (int i = 0; i < NUM_COUNTERS; i++) - if (counters[i]) - pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]); - if (pbuf != buf) - printstr(buf); - - exit(ret); -} - -#undef putchar -int putchar(int ch) -{ - static __thread char buf[64] __attribute__((aligned(64))); - static __thread int buflen = 0; - - buf[buflen++] = ch; - - if (ch == '\n' || buflen == sizeof(buf)) - { - syscall(SYS_write, 1, (uintptr_t)buf, buflen); - buflen = 0; - } - - return 0; -} - -void printhex(uint64_t x) -{ - char str[17]; - int i; - for (i = 0; i < 16; i++) - { - str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10); - x >>= 4; - } - str[16] = 0; - - printstr(str); -} - -static inline void printnum(void (*putch)(int, void**), void **putdat, - unsigned long long num, unsigned base, int width, int padc) -{ - unsigned digs[sizeof(num)*CHAR_BIT]; - int pos = 0; - - while (1) - { - digs[pos++] = num % base; - if (num < base) - break; - num /= base; - } - - while (width-- > pos) - putch(padc, putdat); - - while (pos-- > 0) - putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat); -} - -static unsigned long long getuint(va_list *ap, int lflag) -{ - if (lflag >= 2) - return va_arg(*ap, unsigned long long); - else if (lflag) - return va_arg(*ap, unsigned long); - else - return va_arg(*ap, unsigned int); -} - -static long long getint(va_list *ap, int lflag) -{ - if (lflag >= 2) - return va_arg(*ap, long long); - else if (lflag) - return va_arg(*ap, long); - else - return va_arg(*ap, int); -} - -static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap) -{ - register const char* p; - const char* last_fmt; - register int ch, err; - unsigned long long num; - int base, lflag, width, precision, altflag; - char padc; - - while (1) { - while ((ch = *(unsigned char *) fmt) != '%') { - if (ch == '\0') - return; - fmt++; - putch(ch, putdat); - } - fmt++; - - // Process a %-escape sequence - last_fmt = fmt; - padc = ' '; - width = -1; - precision = -1; - lflag = 0; - altflag = 0; - reswitch: - switch (ch = *(unsigned char *) fmt++) { - - // flag to pad on the right - case '-': - padc = '-'; - goto reswitch; - - // flag to pad with 0's instead of spaces - case '0': - padc = '0'; - goto reswitch; - - // width field - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - for (precision = 0; ; ++fmt) { - precision = precision * 10 + ch - '0'; - ch = *fmt; - if (ch < '0' || ch > '9') - break; - } - goto process_precision; - - case '*': - precision = va_arg(ap, int); - goto process_precision; - - case '.': - if (width < 0) - width = 0; - goto reswitch; - - case '#': - altflag = 1; - goto reswitch; - - process_precision: - if (width < 0) - width = precision, precision = -1; - goto reswitch; - - // long flag (doubled for long long) - case 'l': - lflag++; - goto reswitch; - - // character - case 'c': - putch(va_arg(ap, int), putdat); - break; - - // string - case 's': - if ((p = va_arg(ap, char *)) == NULL) - p = "(null)"; - if (width > 0 && padc != '-') - for (width -= strnlen(p, precision); width > 0; width--) - putch(padc, putdat); - for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) { - putch(ch, putdat); - p++; - } - for (; width > 0; width--) - putch(' ', putdat); - break; - - // (signed) decimal - case 'd': - num = getint(&ap, lflag); - if ((long long) num < 0) { - putch('-', putdat); - num = -(long long) num; - } - base = 10; - goto signed_number; - - // unsigned decimal - case 'u': - base = 10; - goto unsigned_number; - - // (unsigned) octal - case 'o': - // should do something with padding so it's always 3 octits - base = 8; - goto unsigned_number; - - // pointer - case 'p': - static_assert(sizeof(long) == sizeof(void*)); - lflag = 1; - putch('0', putdat); - putch('x', putdat); - /* fall through to 'x' */ - - // (unsigned) hexadecimal - case 'X': - case 'x': - base = 16; - unsigned_number: - num = getuint(&ap, lflag); - signed_number: - printnum(putch, putdat, num, base, width, padc); - break; - - // escaped '%' character - case '%': - putch(ch, putdat); - break; - - // unrecognized escape sequence - just print it literally - default: - putch('%', putdat); - fmt = last_fmt; - break; - } - } -} -/* -int printf(const char* fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - - vprintfmt((void*)putchar, 0, fmt, ap); - - va_end(ap); - return 0; // incorrect return value, but who cares, anyway? -}*/ - - -void _send_char(char c) { -/*#error "You must implement the method _send_char to use this file!\n"; -*/ -volatile unsigned char *THR=(unsigned char *)0x10000000; -volatile unsigned char *LSR=(unsigned char *)0x10000005; - -while(!(*LSR&0b100000)); -*THR=c; -while(!(*LSR&0b100000)); -} - - -int sprintf(char* str, const char* fmt, ...) -{ - va_list ap; - char* str0 = str; - va_start(ap, fmt); - - void sprintf_putch(int ch, void** data) - { - char** pstr = (char**)data; - **pstr = ch; - (*pstr)++; - } - - vprintfmt(sprintf_putch, (void**)&str, fmt, ap); - *str = 0; - - va_end(ap); - return str - str0; -} - -void* memcpy(void* dest, const void* src, size_t len) -{ - if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) { - const uintptr_t* s = src; - uintptr_t *d = dest; - while (d < (uintptr_t*)(dest + len)) - *d++ = *s++; - } else { - const char* s = src; - char *d = dest; - while (d < (char*)(dest + len)) - *d++ = *s++; - } - return dest; -} - -void* memset(void* dest, int byte, size_t len) -{ - if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) { - uintptr_t word = byte & 0xFF; - word |= word << 8; - word |= word << 16; - word |= word << 16 << 16; - - uintptr_t *d = dest; - while (d < (uintptr_t*)(dest + len)){ - *d = word; - d++;} - } else { - char *d = dest; - while (d < (char*)(dest + len)){ - *d = byte; - d++;} - } - return dest; -} -//recompile pls -size_t strlen(const char *s) -{ - const char *p = s; - while (*p) - p++; - return p - s; -} - -/*size_t strnlen(const char *s, size_t n) -{ - const char *p = s; - while (n-- && *p) - p++; - return p - s; -}*/ - -int strcmp(const char* s1, const char* s2) -{ - unsigned char c1, c2; - - do { - c1 = *s1++; - c2 = *s2++; - } while (c1 != 0 && c1 == c2); - - return c1 - c2; -} - -char* strcpy(char* dest, const char* src) -{ - char* d = dest; - while ((*d++ = *src++)) - ; - return dest; -} - -long atol(const char* str) -{ - long res = 0; - int sign = 0; - - while (*str == ' ') - str++; - - if (*str == '-' || *str == '+') { - sign = *str == '-'; - str++; - } - - while (*str) { - res *= 10; - res += *str++ - '0'; - } - - return sign ? -res : res; -} - -int sendstring(const char *p){ - int n=0; - while (*p) { - _send_char(*p); - n++; - p++; - } - - return n; -} -int gg_printf(const char *fmt, ...) -{ - char buf[256],*p; - va_list args; - int n=0; - - va_start(args, fmt); - ee_vsprintf(buf, fmt, args); - va_end(args); - p=buf; - /* while (*p) { - _send_char(*p); - n++; - p++; - } -*/ -n=sendstring(p); - return n; -} - - -int puts(const char* s) -{ - gg_printf(s); - gg_printf("\n"); - return 0; // incorrect return value, but who cares, anyway? -} - -unsigned long getTimer(void){ - unsigned long *MTIME = (unsigned long*)0x0200BFF8; - return *MTIME; - -} diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/syscalls.c b/benchmarks/riscv-coremark/riscv64-baremetal/syscalls.c deleted file mode 100644 index 185391236..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/syscalls.c +++ /dev/null @@ -1,540 +0,0 @@ -// See LICENSE for license details. - -#include -#include -#include -#include -#include -#include -#include "util.h" -#include "coremark.h" -#include - -#define SYS_write 64 - -#undef strcmp - -extern volatile uint64_t tohost; -extern volatile uint64_t fromhost; - - -void _send_char(char c) { -/*#error "You must implement the method _send_char to use this file!\n"; -*/ -volatile unsigned char *THR=(unsigned char *)0x10000000; -volatile unsigned char *LSR=(unsigned char *)0x10000005; - -while(!(*LSR&0b100000)); -*THR=c; -while(!(*LSR&0b100000)); -} - -int sendstring(const char *p){ - int n=0; - while (*p) { - _send_char(*p); - n++; - p++; - } - - return n; -} - -static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2) -{ - volatile uint64_t magic_mem[8] __attribute__((aligned(64))); - magic_mem[0] = which; - magic_mem[1] = arg0; - magic_mem[2] = arg1; - magic_mem[3] = arg2; - __sync_synchronize(); - - tohost = (uintptr_t)magic_mem; - while (fromhost == 0) - ; - fromhost = 0; - - __sync_synchronize(); - return magic_mem[0]; -} - -#define NUM_COUNTERS 3 -static uintptr_t counters[NUM_COUNTERS]; -static char* counter_names[NUM_COUNTERS]; - -void setStats(int enable) -{ - int i = 0; -#define READ_CTR(name) do { \ - while (i >= NUM_COUNTERS) ; \ - uintptr_t csr = read_csr(name); \ - if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \ - counters[i++] = csr; \ - } while (0) - - READ_CTR(mcycle); - READ_CTR(minstret); - READ_CTR(mhpmcounter3); - READ_CTR(mhpmcounter4); - READ_CTR(mhpmcounter5); - READ_CTR(mhpmcounter6); - READ_CTR(mhpmcounter7); - READ_CTR(mhpmcounter8); - READ_CTR(mhpmcounter9); - READ_CTR(mhpmcounter10); - READ_CTR(mhpmcounter11); - READ_CTR(mhpmcounter12); - -#undef READ_CTR -} - -void __attribute__((noreturn)) tohost_exit(uintptr_t code) -{ - tohost = (code << 1) | 1; - asm ("ecall"); - exit(0); -} - -uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32]) -{ - tohost_exit(1337); -} - -void exit(int code) -{ - tohost_exit(code); -} - -void abort() -{ - exit(128 + SIGABRT); -} - -void printstr(const char* s) -{ - syscall(SYS_write, 1, (uintptr_t)s, strlen(s)); -} - -void __attribute__((weak)) thread_entry(int cid, int nc) -{ - // multi-threaded programs override this function. - // for the case of single-threaded programs, only let core 0 proceed. - while (cid != 0); -} - -int __attribute__((weak)) main(int argc, char** argv) -{ - // single-threaded programs override this function. - printstr("Implement main(), foo!\n"); - return -1; -} - -static void init_tls() -{ - register void* thread_pointer asm("tp"); - extern char _tls_data; - extern __thread char _tdata_begin, _tdata_end, _tbss_end; - size_t tdata_size = &_tdata_end - &_tdata_begin; - memcpy(thread_pointer, &_tls_data, tdata_size); - size_t tbss_size = &_tbss_end - &_tdata_end; - memset(thread_pointer + tdata_size, 0, tbss_size); -} - -void _init(int cid, int nc) -{ - init_tls(); - thread_entry(cid, nc); - - // only single-threaded programs should ever get here. - int ret = main(0, 0); - - char buf[NUM_COUNTERS * 32] __attribute__((aligned(64))); - char* pbuf = buf; - for (int i = 0; i < NUM_COUNTERS; i++) - if (counters[i]) - pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]); - if (pbuf != buf) - printstr(buf); - counters[3] = read_csr(mhpmcounter3) - counters[3]; - counters[4] = read_csr(mhpmcounter4) - counters[4]; - counters[5] = read_csr(mhpmcounter5) - counters[5]; - counters[6] = read_csr(mhpmcounter6) - counters[6]; - counters[7] = read_csr(mhpmcounter7) - counters[7]; - counters[8] = read_csr(mhpmcounter8) - counters[8]; - counters[9] = read_csr(mhpmcounter9) - counters[9]; - counters[10] = read_csr(mhpmcounter10) - counters[10]; - counters[11] = read_csr(mhpmcounter11) - counters[11]; - counters[12] = read_csr(mhpmcounter12) - counters[12]; - - ee_printf("Load Stalls : %d\n", counters[3]); - ee_printf("D-Cache Accesses : %d\n", counters[11]); - ee_printf("D-Cache Misses : %d\n", counters[12]); - ee_printf("Branches : %d\n", counters[5]); - ee_printf("Branches Miss Predictions : %d\n", counters[4]); - ee_printf("BTB Misses : %d\n", counters[6]); - ee_printf("Jump, JAL, JALR : %d\n", counters[7]); - ee_printf("RAS Wrong : %d\n", counters[8]); - ee_printf("Returns : %d\n", counters[9]); - ee_printf("BP Class Wrong : %d\n", counters[10]); - ee_printf("Done printing performance counters : \n"); - - exit(ret); -} - -#undef putchar -int putchar(int ch) -{ - /*static __thread char buf[64] __attribute__((aligned(64))); - static __thread int buflen = 0; - - buf[buflen++] = ch; - - if (ch == '\n' || buflen == sizeof(buf)) - { - syscall(SYS_write, 1, (uintptr_t)buf, buflen); - buflen = 0; - } - - return 0;*/ - _send_char(ch); - return 0; - -} - -void printhex(uint64_t x) -{ - char str[17]; - int i; - for (i = 0; i < 16; i++) - { - str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10); - x >>= 4; - } - str[16] = 0; - - printstr(str); -} - -static inline void printnum(void (*putch)(int, void**), void **putdat, - unsigned long long num, unsigned base, int width, int padc) -{ - unsigned digs[sizeof(num)*CHAR_BIT]; - int pos = 0; - - while (1) - { - digs[pos++] = num % base; - if (num < base) - break; - num /= base; - } - - while (width-- > pos) - putch(padc, putdat); - - while (pos-- > 0) - putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat); -} - -static unsigned long long getuint(va_list *ap, int lflag) -{ - if (lflag >= 2) - return va_arg(*ap, unsigned long long); - else if (lflag) - return va_arg(*ap, unsigned long); - else - return va_arg(*ap, unsigned int); -} - -static long long getint(va_list *ap, int lflag) -{ - if (lflag >= 2) - return va_arg(*ap, long long); - else if (lflag) - return va_arg(*ap, long); - else - return va_arg(*ap, int); -} - -static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap) -{ - register const char* p; - const char* last_fmt; - register int ch, err; - unsigned long long num; - int base, lflag, width, precision, altflag; - char padc; - - while (1) { - while ((ch = *(unsigned char *) fmt) != '%') { - if (ch == '\0') - return; - fmt++; - putch(ch, putdat); - } - fmt++; - - // Process a %-escape sequence - last_fmt = fmt; - padc = ' '; - width = -1; - precision = -1; - lflag = 0; - altflag = 0; - reswitch: - switch (ch = *(unsigned char *) fmt++) { - - // flag to pad on the right - case '-': - padc = '-'; - goto reswitch; - - // flag to pad with 0's instead of spaces - case '0': - padc = '0'; - goto reswitch; - - // width field - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - for (precision = 0; ; ++fmt) { - precision = precision * 10 + ch - '0'; - ch = *fmt; - if (ch < '0' || ch > '9') - break; - } - goto process_precision; - - case '*': - precision = va_arg(ap, int); - goto process_precision; - - case '.': - if (width < 0) - width = 0; - goto reswitch; - - case '#': - altflag = 1; - goto reswitch; - - process_precision: - if (width < 0) - width = precision, precision = -1; - goto reswitch; - - // long flag (doubled for long long) - case 'l': - lflag++; - goto reswitch; - - // character - case 'c': - putch(va_arg(ap, int), putdat); - break; - - // string - case 's': - if ((p = va_arg(ap, char *)) == NULL) - p = "(null)"; - if (width > 0 && padc != '-') - for (width -= strnlen(p, precision); width > 0; width--) - putch(padc, putdat); - for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) { - putch(ch, putdat); - p++; - } - for (; width > 0; width--) - putch(' ', putdat); - break; - - // (signed) decimal - case 'd': - num = getint(&ap, lflag); - if ((long long) num < 0) { - putch('-', putdat); - num = -(long long) num; - } - base = 10; - goto signed_number; - - // unsigned decimal - case 'u': - base = 10; - goto unsigned_number; - - // (unsigned) octal - case 'o': - // should do something with padding so it's always 3 octits - base = 8; - goto unsigned_number; - - // pointer - case 'p': - static_assert(sizeof(long) == sizeof(void*)); - lflag = 1; - putch('0', putdat); - putch('x', putdat); - /* fall through to 'x' */ - - // (unsigned) hexadecimal - case 'X': - case 'x': - base = 16; - unsigned_number: - num = getuint(&ap, lflag); - signed_number: - printnum(putch, putdat, num, base, width, padc); - break; - - // escaped '%' character - case '%': - putch(ch, putdat); - break; - - // unrecognized escape sequence - just print it literally - default: - putch('%', putdat); - fmt = last_fmt; - break; - } - } -} - -int printf(const char* fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - - vprintfmt((void*)putchar, 0, fmt, ap); - - va_end(ap); - return 0; // incorrect return value, but who cares, anyway? -} - -int puts(const char* s) -{ - printf(s); - printf("\n"); - return 0; // incorrect return value, but who cares, anyway? -} - -int sprintf(char* str, const char* fmt, ...) -{ - va_list ap; - char* str0 = str; - va_start(ap, fmt); - - void sprintf_putch(int ch, void** data) - { - char** pstr = (char**)data; - **pstr = ch; - (*pstr)++; - } - - vprintfmt(sprintf_putch, (void**)&str, fmt, ap); - *str = 0; - - va_end(ap); - return str - str0; -} - -void* memcpy(void* dest, const void* src, size_t len) -{ - if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) { - const uintptr_t* s = src; - uintptr_t *d = dest; - while (d < (uintptr_t*)(dest + len)) - *d++ = *s++; - } else { - const char* s = src; - char *d = dest; - while (d < (char*)(dest + len)) - *d++ = *s++; - } - return dest; -} -void* memset(void* dest, int byte, size_t len) -{ - if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) { - uintptr_t word = byte & 0xFF; - word |= word << 8; - word |= word << 16; - word |= word << 16 << 16; - - uintptr_t *d = dest; - while (d < (uintptr_t*)(dest + len)){ - *d = word; - d++;} - } else { - char *d = dest; - while (d < (char*)(dest + len)){ - *d = byte; - d++;} - } - return dest; -} - -size_t strlen(const char *s) -{ - const char *p = s; - while (*p) - p++; - return p - s; -} - -size_t strnlen(const char *s, size_t n) -{ - const char *p = s; - while (n-- && *p) - p++; - return p - s; -} - -int strcmp(const char* s1, const char* s2) -{ - unsigned char c1, c2; - - do { - c1 = *s1++; - c2 = *s2++; - } while (c1 != 0 && c1 == c2); - - return c1 - c2; -} - -char* strcpy(char* dest, const char* src) -{ - char* d = dest; - while ((*d++ = *src++)) - ; - return dest; -} - -long atol(const char* str) -{ - long res = 0; - int sign = 0; - - while (*str == ' ') - str++; - - if (*str == '-' || *str == '+') { - sign = *str == '-'; - str++; - } - - while (*str) { - res *= 10; - res += *str++ - '0'; - } - - return sign ? -res : res; -} diff --git a/benchmarks/riscv-coremark/riscv64-baremetal/util.h b/benchmarks/riscv-coremark/riscv64-baremetal/util.h deleted file mode 100644 index 081cfd634..000000000 --- a/benchmarks/riscv-coremark/riscv64-baremetal/util.h +++ /dev/null @@ -1,90 +0,0 @@ -// See LICENSE for license details. - -#ifndef __UTIL_H -#define __UTIL_H - -extern void setStats(int enable); - -#include - -#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } - -static int verify(int n, const volatile int* test, const int* verify) -{ - int i; - // Unrolled for faster verification - for (i = 0; i < n/2*2; i+=2) - { - int t0 = test[i], t1 = test[i+1]; - int v0 = verify[i], v1 = verify[i+1]; - if (t0 != v0) return i+1; - if (t1 != v1) return i+2; - } - if (n % 2 != 0 && test[n-1] != verify[n-1]) - return n; - return 0; -} - -static int verifyDouble(int n, const volatile double* test, const double* verify) -{ - int i; - // Unrolled for faster verification - for (i = 0; i < n/2*2; i+=2) - { - double t0 = test[i], t1 = test[i+1]; - double v0 = verify[i], v1 = verify[i+1]; - int eq1 = t0 == v0, eq2 = t1 == v1; - if (!(eq1 & eq2)) return i+1+eq1; - } - if (n % 2 != 0 && test[n-1] != verify[n-1]) - return n; - return 0; -} - -static void __attribute__((noinline)) barrier(int ncores) -{ - static volatile int sense; - static volatile int count; - static __thread int threadsense; - - __sync_synchronize(); - - threadsense = !threadsense; - if (__sync_fetch_and_add(&count, 1) == ncores-1) - { - count = 0; - sense = threadsense; - } - else while(sense != threadsense) - ; - - __sync_synchronize(); -} - -static uint64_t lfsr(uint64_t x) -{ - uint64_t bit = (x ^ (x >> 1)) & 1; - return (x >> 1) | (bit << 62); -} - -static uintptr_t insn_len(uintptr_t pc) -{ - return (*(unsigned short*)pc & 3) ? 4 : 2; -} - -#ifdef __riscv -#include "encoding.h" -#endif - -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code, iter) do { \ - unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ - code; \ - _c += read_csr(mcycle), _i += read_csr(minstret); \ - if (cid == 0) \ - printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ - } while(0) - -#endif //__UTIL_H diff --git a/examples/fp/fpcalc/fpcalc.c b/examples/fp/fpcalc/fpcalc.c index f41494c70..94bfc9ac1 100644 --- a/examples/fp/fpcalc/fpcalc.c +++ b/examples/fp/fpcalc/fpcalc.c @@ -157,7 +157,7 @@ void softfloatInit(void) { // RM: softfloat_round_min softfloat_roundingMode = softfloat_round_near_even; softfloat_exceptionFlags = 0; // clear exceptions - softfloat_detectTininess = softfloat_tininess_beforeRounding; // RISC-V behavior for tininess + softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess } uint64_t parseNum(char *num) { diff --git a/examples/fp/softfloat_demo/softfloat_demo.c b/examples/fp/softfloat_demo/softfloat_demo.c index 918682ba2..0f7ba6f00 100644 --- a/examples/fp/softfloat_demo/softfloat_demo.c +++ b/examples/fp/softfloat_demo/softfloat_demo.c @@ -37,7 +37,7 @@ void softfloatInit(void) { // RM: softfloat_round_min softfloat_roundingMode = softfloat_round_near_even; softfloat_exceptionFlags = 0; // clear exceptions - softfloat_detectTininess = softfloat_tininess_beforeRounding; // RISC-V behavior for tininess + softfloat_detectTininess = softfloat_tininess_afterRounding; // RISC-V behavior for tininess } int main() diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index cc8d1b2b8..8f13b2e36 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 32 +`define XLEN 64 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 5db8af1cf..015ef2611 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -95,11 +95,27 @@ // largest length in IEU/FPU `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) -`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) +`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9)) +`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) + +// division constants +`define RADIX 32'h2 +`define DIVCOPIES 32'h1 +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) +`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) +`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) +// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) +// one interation is required for the integer bit for minimally redundent radix-4 +`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) +`define DURLEN ($clog2(`FPDUR+1)) +`define QLEN (`FPDUR*`LOGR*`DIVCOPIES) + + +`define USE_SRAM 0 // Disable spurious Verilator warnings diff --git a/pipelined/regression/sim-buildroot-batch b/pipelined/regression/sim-buildroot-batch index 4a145bc99..a8d4038ef 100755 --- a/pipelined/regression/sim-buildroot-batch +++ b/pipelined/regression/sim-buildroot-batch @@ -32,5 +32,5 @@ echo "CHECKPOINT = ${CHECKPOINT}" # *** change config from buildroot to rv64gc vsim -c <`XLEN) - mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData)); - else - mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); - mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), - .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), - .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); + mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), + .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), + .d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}), .s({SelFlush, SelEvict}), .y(CacheBusAdr)); ///////////////////////////////////////////////////////////////////////////////////////////// @@ -185,7 +179,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER flopenl #(NUMWAYS) FlushWayReg(.clk, .load(ResetOrFlushWay), .en(FlushWayCntEn), .val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay)); assign FlushWayFlag = FlushWay[NUMWAYS-1]; - assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + if(NUMWAYS > 1) assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + else assign NextFlushWay = FlushWay[NUMWAYS-1]; ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables diff --git a/pipelined/src/cache/cachereplacementpolicy.sv b/pipelined/src/cache/cachereplacementpolicy.sv index cb33480dc..d407bc284 100644 --- a/pipelined/src/cache/cachereplacementpolicy.sv +++ b/pipelined/src/cache/cachereplacementpolicy.sv @@ -59,8 +59,8 @@ module cachereplacementpolicy // Replacement Bits: Register file // Needs to be resettable for simulation, but could omit reset for synthesis *** always_ff @(posedge clk) - if (reset) for (int set = 0; set < NUMLINES; set++) ReplacementBits[set] = '0; - else if (LRUWriteEnD) ReplacementBits[RAdrD] = NewReplacementD; + if (reset) for (int set = 0; set < NUMLINES; set++) ReplacementBits[set] <= '0; + else if (LRUWriteEnD) ReplacementBits[RAdrD] <= NewReplacementD; assign LineReplacementBits = ReplacementBits[RAdrD]; genvar index; diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index ac1e26e8f..192fb9ace 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic [LINELEN-1:0] CacheWriteData, - input logic FLoad2, + input logic FStore2, input logic SetValidWay, input logic ClearValidWay, input logic SetDirtyWay, @@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); - assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0}; + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0}; end else onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); @@ -105,7 +105,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, // Data Array ///////////////////////////////////////////////////////////////////////////////////////////// - // *** Potential optimization: if byte write enables are available, could remove subwordwrites + // *** instantiate one larger RAM, not one per RAM. Expand byte mask genvar words; for(words = 0; words < LINELEN/`XLEN; words++) begin: word sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr), diff --git a/pipelined/src/cache/sram1p1rw.sv b/pipelined/src/cache/sram1p1rw.sv index ac16ae9bf..49bf5d852 100644 --- a/pipelined/src/cache/sram1p1rw.sv +++ b/pipelined/src/cache/sram1p1rw.sv @@ -33,6 +33,8 @@ // WIDTH is number of bits in one "word" of the memory, DEPTH is number of such words +`include "wally-config.vh" + module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( input logic clk, input logic [$clog2(DEPTH)-1:0] Adr, @@ -47,37 +49,34 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) ( always_ff @(posedge clk) AdrD <= Adr; genvar index; -/* -----\/----- EXCLUDED -----\/----- - for(index = 0; index < WIDTH/8; index++) begin - always_ff @(posedge clk) begin - if (WriteEnable & ByteMask[index]) begin - StoredData[Adr][8*(index+1)-1:8*index] <= #1 CacheWriteData[8*(index+1)-1:8*index]; - end - end - end - -----/\----- EXCLUDED -----/\----- */ - if (WIDTH%8 != 0) // handle msbs if not a multiple of 8 - always_ff @(posedge clk) - if (WriteEnable & ByteMask[WIDTH/8]) - StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 - CacheWriteData[WIDTH-1:WIDTH-WIDTH%8]; - - for(index = 0; index < WIDTH/8; index++) - always_ff @(posedge clk) - if(WriteEnable & ByteMask[index]) - StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8]; -/* - // if not a multiple of 8, MSByte is not 8 bits long. - if(WIDTH%8 != 0) begin - always_ff @(posedge clk) begin - if (WriteEnable & ByteMask[WIDTH/8]) begin - StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 CacheWriteData[WIDTH-1:WIDTH-WIDTH%8]; - end - end - end -*/ - assign ReadData = StoredData[AdrD]; + + if (`USE_SRAM == 1) begin + // 64 x 128-bit SRAM + // check if the size is ok, complain if not*** + logic [WIDTH-1:0] BitWriteMask; + for (index=0; index < WIDTH; index++) + assign BitWriteMask[index] = ByteMask[index/8]; + TS1N28HPCPSVTB64X128M4SWBASO sram( + .CLK(clk), .CEB(1'b0), .WEB(~WriteEnable), + .A(Adr), .D(CacheWriteData), + .BWEB(~BitWriteMask), .Q(ReadData) + ); + + end else begin + if (WIDTH%8 != 0) // handle msbs if not a multiple of 8 + always_ff @(posedge clk) + if (WriteEnable & ByteMask[WIDTH/8]) + StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 + CacheWriteData[WIDTH-1:WIDTH-WIDTH%8]; + + for(index = 0; index < WIDTH/8; index++) + always_ff @(posedge clk) + if(WriteEnable & ByteMask[index]) + StoredData[Adr][index*8 +: 8] <= #1 CacheWriteData[index*8 +: 8]; + + assign ReadData = StoredData[AdrD]; + end endmodule diff --git a/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v b/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v new file mode 120000 index 000000000..c81975209 --- /dev/null +++ b/pipelined/src/cache/ts1n28hpcpsvtb64x128m4swbaso_180a_tt1v25c.v @@ -0,0 +1 @@ +/proj/wally/memory/ts1n28hpcpsvtb64x128m4sw_180a/VERILOG/ts1n28hpcpsvtb64x128m4sw_180a_tt1v25c.v \ No newline at end of file diff --git a/pipelined/src/ebu/ahblite.sv b/pipelined/src/ebu/ahblite.sv index 41305090d..a502bf9f4 100644 --- a/pipelined/src/ebu/ahblite.sv +++ b/pipelined/src/ebu/ahblite.sv @@ -49,6 +49,8 @@ module ahblite ( input logic [2:0] IFUBurstType, input logic [1:0] IFUTransType, input logic IFUTransComplete, + input logic [(`XLEN-1)/8:0] ByteMaskM, + // Signals from Data Cache input logic [`PA_BITS-1:0] LSUBusAdr, input logic LSUBusRead, @@ -67,6 +69,7 @@ module ahblite ( (* mark_debug = "true" *) output logic HCLK, HRESETn, (* mark_debug = "true" *) output logic [31:0] HADDR, // *** one day switch to a different bus that supports the full physical address (* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA, + output logic [`XLEN/8-1:0] HWSTRB, (* mark_debug = "true" *) output logic HWRITE, (* mark_debug = "true" *) output logic [2:0] HSIZE, (* mark_debug = "true" *) output logic [2:0] HBURST, @@ -131,8 +134,8 @@ module ahblite ( // bus outputs assign #1 GrantData = (NextBusState == MEMREAD) | (NextBusState == MEMWRITE); - assign #1 AccessAddress = (GrantData) ? LSUBusAdr[31:0] : IFUBusAdr[31:0]; - assign #1 HADDR = AccessAddress; + assign AccessAddress = (GrantData) ? LSUBusAdr[31:0] : IFUBusAdr[31:0]; + assign HADDR = AccessAddress; assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize; assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst. @@ -154,6 +157,10 @@ module ahblite ( assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise assign HMASTLOCK = 0; // no locking supported assign HWRITE = (NextBusState == MEMWRITE); + //assign HWSTRB = ByteMaskM; + // Byte mask for HWSTRB + swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(HWSTRB)); + // delay write data by one cycle for flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN // delay signals for subword writes @@ -163,13 +170,10 @@ module ahblite ( // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN - - assign IFUBusHRDATA = HRDATA; assign LSUBusHRDATA = HRDATA; assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD); assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE)); assign IFUBusAck = HREADY & (BusState == INSTRREAD); assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE)); - endmodule diff --git a/pipelined/src/fpu/convert_inputs.sv b/pipelined/src/fpu/convert_inputs.sv deleted file mode 100755 index aec1aa017..000000000 --- a/pipelined/src/fpu/convert_inputs.sv +++ /dev/null @@ -1,74 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Floating point divider/square root top unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module convert_inputs( - input [63:0] op1, // 1st input operand (A) - input [63:0] op2, // 2nd input operand (B) - input [2:0] op_type, // Function opcode - input P, // Result Precision (0 for double, 1 for single) - - output [63:0] Float1, // Converted 1st input operand - output [63:0] Float2 // Converted 2nd input operand -); - - wire conv_SP; // Convert from SP to DP - wire Zexp1; // One if the exponent of op1 is zero - wire Zexp2; // One if the exponent of op2 is zero - wire Oexp1; // One if the exponent of op1 is all ones - wire Oexp2; // One if the exponent of op2 is all ones - - // Convert from single precision to double precision if (op_type is 11X - // and P is 0) or (op_type is not 11X and P is one). - assign conv_SP = ~P; - - // Test if the input exponent is zero, because if it is then the - // exponent of the converted number should be zero. - assign Zexp1 = ~(|op1[30:23]); - assign Zexp2 = ~(|op2[30:23]); - assign Oexp1 = (&op1[30:23]); - assign Oexp2 = (&op2[30:23]); - - // Conditionally convert op1. Lower 29 bits are zero for single precision. - assign Float1[62:29] = conv_SP ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]} - : op1[62:29]; - assign Float1[28:0] = op1[28:0] & {29{~conv_SP}}; - - // Conditionally convert op2. Lower 29 bits are zero for single precision. - assign Float2[62:29] = conv_SP ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]} - : op2[62:29]; - assign Float2[28:0] = op2[28:0] & {29{~conv_SP}}; - - // Set the sign of Float1 based on its original sign and if the operation - // is negation (op_type = 101) or absolute value (op_type = 100) - - assign Float1[63] = conv_SP ? op1[31] : op1[63]; - assign Float2[63] = conv_SP ? op2[31] : op2[63]; - -endmodule // convert_inputs - diff --git a/pipelined/src/fpu/convert_inputs_div.sv b/pipelined/src/fpu/convert_inputs_div.sv deleted file mode 100755 index 9d6d75c22..000000000 --- a/pipelined/src/fpu/convert_inputs_div.sv +++ /dev/null @@ -1,47 +0,0 @@ -// This module takes as inputs two operands (op1 and op2) -// and the result precision (P). Based on the operation and precision, -// it conditionally converts single precision values to double -// precision values and modifies the sign of op1. -// The converted operands are Float1 and Float2. -module convert_inputs_div ( - - input logic [63:0] op1, // 1st input operand (A) - input logic [63:0] op2, // 2nd input operand (B) - input logic P, // Result Precision (0 for double, 1 for single) - input logic op_type, // Operation - - output logic [63:0] Float1, // Converted 1st input operand - output logic [63:0] Float2b // Converted 2nd input operand -); - - logic [63:0] Float2; - logic Zexp1; // One if the exponent of op1 is zero - logic Zexp2; // One if the exponent of op2 is zero - logic Oexp1; // One if the exponent of op1 is all ones - logic Oexp2; // One if the exponent of op2 is all ones - - // Test if the input exponent is zero, because if it is then the - // exponent of the converted number should be zero. - assign Zexp1 = ~(|op1[30:23]); - assign Zexp2 = ~(|op2[30:23]); - assign Oexp1 = (&op1[30:23]); - assign Oexp2 = (&op2[30:23]); - - // Conditionally convert op1. Lower 29 bits are zero for single precision. - assign Float1[62:29] = P ? {op1[30], {3{(~op1[30]&~Zexp1)|Oexp1}}, op1[29:0]} - : op1[62:29]; - assign Float1[28:0] = op1[28:0] & {29{~P}}; - - // Conditionally convert op2. Lower 29 bits are zero for single precision. - assign Float2[62:29] = P ? {op2[30], {3{(~op2[30]&~Zexp2)|Oexp2}}, op2[29:0]} - : op2[62:29]; - assign Float2[28:0] = op2[28:0] & {29{~P}}; - - // Set the sign of Float1 based on its original sign - assign Float1[63] = P ? op1[31] : op1[63]; - assign Float2[63] = P ? op2[31] : op2[63]; - - // For sqrt, assign Float2 same as Float1 for simplicity - assign Float2b = op_type ? Float1 : Float2; - -endmodule // convert_inputs \ No newline at end of file diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index ab054342f..4d346d970 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -1,14 +1,42 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Conversion shift calculation +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module cvtshiftcalc( - input logic XZeroM, + input logic XZero, input logic ToInt, input logic IntToFp, - input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic [`NF:0] XManM, // input mantissas + input logic [`NE:0] CvtCe, // the calculated expoent + input logic [`NF:0] Xm, // input mantissas input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) - input logic CvtResDenormUfM, + input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) + input logic CvtResDenormUf, output logic CvtResUf, output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted ); @@ -31,9 +59,10 @@ module cvtshiftcalc( // | `NF-1 zeros | Mantissa | 0's if nessisary | // - otherwise: // | LzcInM | 0's if nessisary | - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : - CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : - {CvtLzcInM, {`NF+1{1'b0}}}; + // change to int shift to the left one + assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : + CvtResDenormUf ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : + {CvtLzcIn, {`NF+1{1'b0}}}; // choose the negative of the fraction size @@ -64,6 +93,6 @@ module cvtshiftcalc( // determine if the result underflows ??? -> fp // - if the first 1 is shifted out of the result then the result underflows // - can't underflow an integer to fp conversions - assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp; + assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/divconv.sv b/pipelined/src/fpu/divconv.sv deleted file mode 100755 index 8cc98cd01..000000000 --- a/pipelined/src/fpu/divconv.sv +++ /dev/null @@ -1,126 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 9/28/2021 -// -// Purpose: Main convergence routine for floating point divider/square root unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module divconv ( - input logic [52:0] d, n, - input logic [2:0] sel_muxa, sel_muxb, - input logic sel_muxr, - input logic load_rega, load_regb, load_regc, load_regd, - input logic load_regr, load_regs, - input logic P, - input logic op_type, - input logic exp_odd, - input logic reset, - input logic clk, - - output logic [59:0] q1, qp1, qm1, - output logic [59:0] q0, qp0, qm0, - output logic [59:0] rega_out, regb_out, regc_out, regd_out, - output logic [119:0] regr_out -); - - logic [59:0] muxa_out, muxb_out; - logic [10:0] ia_div, ia_sqrt; - logic [59:0] ia_out; - logic [119:0] mul_out; - logic [59:0] q_out1, qm_out1, qp_out1; - logic [59:0] q_out0, qm_out0, qp_out0; - logic [59:0] mcand, mplier, mcand_q; - logic [59:0] twocmp_out; - logic [60:0] three; - logic [119:0] constant, constant2; - logic [59:0] q_const, qp_const, qm_const; - logic [59:0] d2, n2; - logic muxr_out; - logic cout1, cout2, cout3, cout4, cout5, cout6, cout7; - - // Check if exponent is odd for sqrt - // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA - assign d2 = (exp_odd&op_type) ? {1'b0, d, 6'h0} : {d, 7'h0}; - assign n2 = op_type ? d2 : {n, 7'h0}; - - // IA div/sqrt - sbtm_div ia1 (d[52:41], ia_div); - sbtm_sqrt ia2 (d2[59:48], ia_sqrt); - assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}}; - - // Choose IA or iteration - mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out); - mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out); - - // Deal with remainder if [0.5, 1) instead of [1, 2) - mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2); - // Select Mcand, Remainder/Q'' - mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant); - // Select mcand - remainder should always choose q1 [1,2) because - // adjustment of N in the from XX.FFFFFFF - mux2 #(60) mx4 (q0, q1, q1[59], mcand_q); - mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier); - mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand); - // Q*D - N (reversed but changed in rounder.v to account for sign reversal) - // Add ulp for subtraction in remainder - mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out); - - // Constant for Q'' - mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const); - mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const); - mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const); - - // CPA (from CSA)/Remainder addition/subtraction - assign {cout1, mul_out} = (mcand*mplier) + constant + {119'b0, muxr_out}; - - // Assuming [1,2) - q1 - assign {cout2, q_out1} = regb_out + q_const; - assign {cout3, qp_out1} = regb_out + qp_const; - assign {cout4, qm_out1} = regb_out + qm_const + 1'b1; - // Assuming [0.5,1) - q0 - assign {cout5, q_out0} = {regb_out[58:0], 1'b0} + q_const; - assign {cout6, qp_out0} = {regb_out[58:0], 1'b0} + qp_const; - assign {cout7, qm_out0} = {regb_out[58:0], 1'b0} + qm_const + 1'b1; - - // One's complement instead of two's complement (for hw efficiency) - assign three = {~mul_out[118], mul_out[118], ~mul_out[117:59]}; - mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type, twocmp_out); - - // regs - flopenr #(60) regc (clk, reset, load_regc, twocmp_out, regc_out); - flopenr #(60) regb (clk, reset, load_regb, mul_out[118:59], regb_out); - flopenr #(60) rega (clk, reset, load_rega, mul_out[118:59], rega_out); - flopenr #(60) regd (clk, reset, load_regd, mul_out[118:59], regd_out); - flopenr #(120) regr (clk, reset, load_regr, mul_out, regr_out); - // Assuming [1,2) - flopenr #(60) rege (clk, reset, load_regs, {q_out1[59:35], (q_out1[34:6] & {29{~P}}), 6'h0}, q1); - flopenr #(60) regf (clk, reset, load_regs, {qm_out1[59:35], (qm_out1[34:6] & {29{~P}}), 6'h0}, qm1); - flopenr #(60) regg (clk, reset, load_regs, {qp_out1[59:35], (qp_out1[34:6] & {29{~P}}), 6'h0}, qp1); - // Assuming [0,1) - flopenr #(60) regh (clk, reset, load_regs, {q_out0[59:35], (q_out0[34:6] & {29{~P}}), 6'h0}, q0); - flopenr #(60) regj (clk, reset, load_regs, {qm_out0[59:35], (qm_out0[34:6] & {29{~P}}), 6'h0}, qm0); - flopenr #(60) regk (clk, reset, load_regs, {qp_out0[59:35], (qp_out0[34:6] & {29{~P}}), 6'h0}, qp0); - -endmodule // divconv diff --git a/pipelined/src/fpu/divconv_pipe.sv b/pipelined/src/fpu/divconv_pipe.sv deleted file mode 100755 index 3562ad3d5..000000000 --- a/pipelined/src/fpu/divconv_pipe.sv +++ /dev/null @@ -1,198 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Convergence unit for pipelined floating point divider/square root top unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, - regr_out, d, n, sel_muxa, sel_muxb, sel_muxr, reset, clk, - load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, load_regp, - P, op_type, exp_odd); - - input logic [52:0] d, n; - input logic [2:0] sel_muxa, sel_muxb; - input logic sel_muxr; - input logic load_rega, load_regb, load_regc, load_regd; - input logic load_regr, load_regs; - input logic load_regp; - input logic P; - input logic op_type; - input logic exp_odd; - input logic reset; - input logic clk; - - output logic [59:0] q1, qp1, qm1; - output logic [59:0] q0, qp0, qm0; - output logic [59:0] rega_out, regb_out, regc_out, regd_out; - output logic [119:0] regr_out; - - supply1 vdd; - supply0 vss; - - logic [59:0] muxa_out, muxb_out; - logic muxr_out; - logic [10:0] ia_div, ia_sqrt; - logic [59:0] ia_out; - logic [119:0] mul_out; - logic [59:0] q_out1, qm_out1, qp_out1; - logic [59:0] q_out0, qm_out0, qp_out0; - logic [59:0] mcand, mplier, mcand_q; - logic [59:0] twocmp_out; - logic [60:0] three; - logic [119:0] Carry, Carry2; - logic [119:0] Sum, Sum2; - logic [119:0] constant, constant2; - logic [59:0] q_const, qp_const, qm_const; - logic [59:0] d2, n2; - logic [11:0] d3; - - // Check if exponent is odd for sqrt - // If exp_odd=1 and sqrt, then M/2 and use ia_addr=0 as IA - assign d2 = (exp_odd&op_type) ? {vss, d, 6'h0} : {d, 7'h0}; - assign n2 = op_type ? d2 : {n, 7'h0}; - - // IA div/sqrt - sbtm_div ia1 (d[52:41], ia_div); - sbtm_sqrt ia2 (d2[59:48], ia_sqrt); - assign ia_out = op_type ? {ia_sqrt, {49{1'b0}}} : {ia_div, {49{1'b0}}}; - - // Choose IA or iteration - mux6 #(60) mx1 (d2, ia_out, rega_out, regc_out, regd_out, regb_out, sel_muxb, muxb_out); - mux5 #(60) mx2 (regc_out, n2, ia_out, regb_out, regd_out, sel_muxa, muxa_out); - - // Deal with remainder if [0.5, 1) instead of [1, 2) - mux2 #(120) mx3a ({~n, {67{1'b1}}}, {{1'b1}, ~n, {66{1'b1}}}, q1[59], constant2); - // Select Mcand, Remainder/Q'' - mux2 #(120) mx3 (120'h0, constant2, sel_muxr, constant); - // Select mcand - remainder should always choose q1 [1,2) because - // adjustment of N in the from XX.FFFFFFF - mux2 #(60) mx4 (q0, q1, q1[59], mcand_q); - mux2 #(60) mx5 (muxb_out, mcand_q, sel_muxr&op_type, mplier); - mux2 #(60) mx6 (muxa_out, mcand_q, sel_muxr, mcand); - // R4 Booth TDM multiplier (carry/save) - redundantmul #(60) bigmul(.a(mcand), .b(mplier), .out0(Sum), .out1(Carry)); - // Q*D - N (reversed but changed in rounder.v to account for sign reversal) - csa #(120) csa1 (Sum, Carry, constant, Sum2, Carry2); - // Add ulp for subtraction in remainder - mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out); - - // Constant for Q'' - mux2 #(60) mx8 ({60'h0000_0000_0000_020}, {60'h0000_0040_0000_000}, P, q_const); - mux2 #(60) mx9 ({60'h0000_0000_0000_0A0}, {60'h0000_0140_0000_000}, P, qp_const); - mux2 #(60) mxA ({60'hFFFF_FFFF_FFFF_F9F}, {60'hFFFF_FF3F_FFFF_FFF}, P, qm_const); - - logic [119:0] Sum_pipe; - logic [119:0] Carry_pipe; - logic muxr_pipe; - logic rega_pipe; - logic regb_pipe; - logic regc_pipe; - logic regd_pipe; - logic regs_pipe; - logic regs_pipe2; - logic regr_pipe; - logic P_pipe; - logic op_type_pipe; - logic [59:0] q_const_pipe; - logic [59:0] qm_const_pipe; - logic [59:0] qp_const_pipe; - logic [59:0] q_const_pipe2; - logic [59:0] qm_const_pipe2; - logic [59:0] qp_const_pipe2; - - // Stage 1 - flopenr #(120) regp1 (clk, reset, load_regp, Sum2, Sum_pipe); - flopenr #(120) regp2 (clk, reset, load_regp, Carry2, Carry_pipe); - flopenr #(1) regp3 (clk, reset, load_regp, muxr_out, muxr_pipe); - - flopenr #(1) regp4 (clk, reset, load_regp, load_rega, rega_pipe); - flopenr #(1) regp5 (clk, reset, load_regp, load_regb, regb_pipe); - flopenr #(1) regp6 (clk, reset, load_regp, load_regc, regc_pipe); - flopenr #(1) regp7 (clk, reset, load_regp, load_regd, regd_pipe); - flopenr #(1) regp8 (clk, reset, load_regp, load_regs, regs_pipe); - flopenr #(1) regp9 (clk, reset, load_regp, load_regr, regr_pipe); - flopenr #(1) regpA (clk, reset, load_regp, P, P_pipe); - flopenr #(1) regpB (clk, reset, load_regp, op_type, op_type_pipe); - flopenr #(60) regpC (clk, reset, load_regp, q_const, q_const_pipe); - flopenr #(60) regpD (clk, reset, load_regp, qp_const, qp_const_pipe); - flopenr #(60) regpE (clk, reset, load_regp, qm_const, qm_const_pipe); - - // CPA (from CSA)/Remainder addition/subtraction - assign mul_out = Sum_pipe + Carry_pipe + {119'h0, muxr_pipe}; - // One's complement instead of two's complement (for hw efficiency) - assign three = {~mul_out[118] , mul_out[118], ~mul_out[117:59]}; - mux2 #(60) mxTC (~mul_out[118:59], three[60:1], op_type_pipe, twocmp_out); - - // Stage 2 - flopenr #(60) regc (clk, reset, regc_pipe, twocmp_out, regc_out); - flopenr #(60) regb (clk, reset, regb_pipe, mul_out[118:59], regb_out); - flopenr #(60) rega (clk, reset, rega_pipe, mul_out[118:59], rega_out); - flopenr #(60) regd (clk, reset, regd_pipe, mul_out[118:59], regd_out); - flopenr #(120) regr (clk, reset, regr_pipe, mul_out, regr_out); - flopenr #(1) regl (clk, reset, regs_pipe, regs_pipe, regs_pipe2); - flopenr #(60) regm (clk, reset, regs_pipe, q_const_pipe, q_const_pipe2); - flopenr #(60) regn (clk, reset, regs_pipe, qp_const_pipe, qp_const_pipe2); - flopenr #(60) rego (clk, reset, regs_pipe, qm_const_pipe, qm_const_pipe2); - - // Assuming [1,2) - q1 - assign q_out1 = regb_out + q_const; - assign qp_out1 = regb_out + qp_const; - assign qm_out1 = regb_out + qm_const + 1'b1; - // Assuming [0.5,1) - q0 - assign q_out0 = {regb_out[58:0], 1'b0} + q_const; - assign qp_out0 = {regb_out[58:0], 1'b0} + qp_const; - assign qm_out0 = {regb_out[58:0], 1'b0} + qm_const + 1'b1; - - // Stage 3 - // Assuming [1,2) - flopenr #(60) rege (clk, reset, regs_pipe2, {q_out1[59:35], (q_out1[34:6] & {29{~P_pipe}}), 6'h0}, q1); - flopenr #(60) regf (clk, reset, regs_pipe2, {qm_out1[59:35], (qm_out1[34:6] & {29{~P_pipe}}), 6'h0}, qm1); - flopenr #(60) regg (clk, reset, regs_pipe2, {qp_out1[59:35], (qp_out1[34:6] & {29{~P_pipe}}), 6'h0}, qp1); - // Assuming [0,1) - flopenr #(60) regh (clk, reset, regs_pipe2, {q_out0[59:35], (q_out0[34:6] & {29{~P_pipe}}), 6'h0}, q0); - flopenr #(60) regj (clk, reset, regs_pipe2, {qm_out0[59:35], (qm_out0[34:6] & {29{~P_pipe}}), 6'h0}, qm0); - flopenr #(60) regk (clk, reset, regs_pipe2, {qp_out0[59:35], (qp_out0[34:6] & {29{~P_pipe}}), 6'h0}, qp0); - -endmodule // divconv - -// *** rewrote behaviorally dh 5 Jan 2021 for speed -module csa #(parameter WIDTH=8) ( - input logic [WIDTH-1:0] a, b, c, - output logic [WIDTH-1:0] sum, carry); - - assign sum = a ^ b ^ c; - assign carry = (a & (b | c)) | (b & c); -/* - logic [WIDTH:0] carry_temp; - genvar i; - for (i=0;i> NF+1 Exp = DivCalcExp+NF+1 - // .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivCalcExp+NF+1-1 - assign DivDenormShift = Nf+DivCalcExpM; + // Left shift amount = DivQe+NF+1-1 + assign DivDenormShift = (`NE+2)'(`NF)+DivQe; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 - // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivQe+1 + // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) // inital Left shift amount = NF - assign NormShift = Nf; + // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit + assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - // *** may be able to reduce shifter size - assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv new file mode 100644 index 000000000..cbf7f95f0 --- /dev/null +++ b/pipelined/src/fpu/divsqrt.sv @@ -0,0 +1,69 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module divsqrt( + input logic clk, + input logic reset, + input logic [`FMTBITS-1:0] FmtE, + input logic [`NF:0] XManE, YManE, + input logic [`NE-1:0] XExpE, YExpE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic DivStartE, + input logic StallM, + input logic StallE, + output logic DivStickyM, + output logic DivBusy, + output logic DivDone, + output logic [`NE+1:0] DivCalcExpM, + output logic [`DURLEN-1:0] EarlyTermShiftM, + output logic [`QLEN-1-(`RADIX/4):0] QuotM +// output logic [`XLEN-1:0] RemM, +); + + logic [`DIVLEN+3:0] NextWSN, NextWCN; + logic [`DIVLEN+3:0] WS, WC; + logic [`DIVLEN+3:0] StickyWSA; + logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; + logic [`DIVLEN-1:0] X; + logic [`DIVLEN-1:0] Dpreproc; + logic [`DURLEN-1:0] Dur; + logic NegSticky; + + srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/exception_div.sv b/pipelined/src/fpu/exception_div.sv deleted file mode 100755 index 3e701d2fb..000000000 --- a/pipelined/src/fpu/exception_div.sv +++ /dev/null @@ -1,82 +0,0 @@ -// Exception logic for the floating point adder. Note: We may -// actually want to move to where the result is computed. -module exception_div ( - - input logic [63:0] A, // 1st input operand (op1) - input logic [63:0] B, // 2nd input operand (op2) - input logic op_type, // Determine operation - output logic [2:0] Ztype, // Indicates type of result (Z) - output logic Invalid // Invalid operation exception -); - - logic AzeroM; // '1' if the mantissa of A is zero - logic BzeroM; // '1' if the mantissa of B is zero - logic AzeroE; // '1' if the exponent of A is zero - logic BzeroE; // '1' if the exponent of B is zero - logic AonesE; // '1' if the exponent of A is all ones - logic BonesE; // '1' if the exponent of B is all ones - logic AInf; // '1' if A is infinite - logic BInf; // '1' if B is infinite - logic AZero; // '1' if A is 0 - logic BZero; // '1' if B is 0 - logic ANaN; // '1' if A is a not-a-number - logic BNaN; // '1' if B is a not-a-number - logic ASNaN; // '1' if A is a signalling not-a-number - logic BSNaN; // '1' if B is a signalling not-a-number - logic ZSNaN; // '1' if result Z is a quiet NaN - logic ZInf; // '1' if result Z is an infnity - logic Zero; // '1' if result is zero - logic NegSqrt; // '1' if sqrt and operand is negative - - //***take this module out and add more registers or just recalculate it all - // Determine if mantissas are all zeros - assign AzeroM = (A[51:0] == 52'h0); - assign BzeroM = (B[51:0] == 52'h0); - - // Determine if exponents are all ones or all zeros - assign AonesE = A[62]&A[61]&A[60]&A[59]&A[58]&A[57]&A[56]&A[55]&A[54]&A[53]&A[52]; - assign BonesE = B[62]&B[61]&B[60]&B[59]&B[58]&B[57]&B[56]&B[55]&B[54]&B[53]&B[52]; - assign AzeroE = ~(A[62]|A[61]|A[60]|A[59]|A[58]|A[57]|A[56]|A[55]|A[54]|A[53]|A[52]); - assign BzeroE = ~(B[62]|B[61]|B[60]|B[59]|B[58]|B[57]|B[56]|B[55]|B[54]|B[53]|B[52]); - - // Determine special cases. Note: Zero is not really a special case. - assign AInf = AonesE & AzeroM; - assign BInf = BonesE & BzeroM; - assign ANaN = AonesE & ~AzeroM; - assign BNaN = BonesE & ~BzeroM; - assign ASNaN = ANaN & A[50]; - assign BSNaN = ANaN & A[50]; - assign AZero = AzeroE & AzeroM; - assign BZero = BzeroE & BzeroE; - - // Is NaN if operand is negative and its a sqrt - assign NegSqrt = (A[63] & op_type & ~AZero); - - // An "Invalid Operation" exception occurs if (A or B is a signalling NaN) - // or (A and B are both Infinite) - assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | - NegSqrt; - - // The result is a quiet NaN if (an "Invalid Operation" exception occurs) - // or (A is a NaN) or (B is a NaN). - assign ZSNaN = Invalid | ANaN | BNaN; - - // The result is zero - assign Zero = (AZero | BInf)&~op_type | AZero&op_type; - - // The result is +Inf if ((A is Inf) or (B is 0)) and (the - // result is not a quiet NaN). - assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN; - - // Set the type of the result as follows: - // Ztype Result - // 000 Normal - // 010 Infinity - // 011 Zero - // 110 Div by 0 - // 111 SNaN - assign Ztype[2] = (ZSNaN); - assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf); - assign Ztype[0] = (ZSNaN) | (Zero); - -endmodule // exception diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index a1a934ffe..6c7ab451f 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -1,4 +1,31 @@ - +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: classify unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module fclassify ( diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index 3d8383ce0..9c6757848 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -1,4 +1,32 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Comparison unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" // FOpCtrlE values diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index f6ed650af..5c553e864 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: control unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module fctrl ( diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 26ca7dd83..b9932523a 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -1,22 +1,51 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Floating point conversions of configurable size +// +// Int component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR Int PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module fcvt ( - input logic XSgnE, // input's sign - input logic [`NE-1:0] XExpE, // input's exponent - input logic [`NF:0] XManE, // input's fraction - input logic [`XLEN-1:0] ForwardedSrcAE, // integer input - from IEU - input logic [2:0] FOpCtrlE, // choose which opperation (look below for values) - input logic FWriteIntE, // is fp->int (since it's writting to the integer register) - input logic XZeroE, // is the input zero - input logic XDenormE, // is the input denormalized - input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`NE:0] CvtCalcExpE, // the calculated expoent - output logic [`LOGCVTLEN-1:0] CvtShiftAmtE, // how much to shift by - output logic CvtResDenormUfE,// does the result underflow or is denormalized - output logic CvtResSgnE, // the result's sign - output logic IntZeroE, // is the integer zero? - output logic [`CVTLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) + input logic Xs, // input's sign + input logic [`NE-1:0] Xe, // input's exponent + input logic [`NF:0] Xm, // input's fraction + input logic [`XLEN-1:0] Int, // integer input - from IEU + input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic XZero, // is the input zero + input logic XDenorm, // is the input denormalized + input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) + output logic [`NE:0] Ce, // the calculated expoent + output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by + output logic ResDenormUf,// does the result underflow or is denormalized + output logic Cs, // the result's sign + output logic IntZero, // is the integer zero? + output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) ); // OpCtrls: @@ -29,9 +58,6 @@ module fcvt ( // bit 2 bit 1 bit 0 // for example: signed long -> single floating point has the OpCode 101 - // (FF) fp -> fp coversion signals - // (IF) int -> fp coversion signals - // (FI) fp -> int coversion signals logic [`FMTBITS-1:0] OutFmt; // format of the output @@ -42,23 +68,22 @@ module fcvt ( logic Signed; // is the opperation with a signed integer? logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? - logic ToInt; // is the opperation an fp->int conversion? - logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC + logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) + logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC // seperate OpCtrl for code readability - assign Signed = FOpCtrlE[0]; - assign Int64 = FOpCtrlE[1]; - assign IntToFp = FOpCtrlE[2]; - assign ToInt = FWriteIntE; + assign Signed = FOpCtrl[0]; + assign Int64 = FOpCtrl[1]; + assign IntToFp = FOpCtrl[2]; // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output - // - int -> fp: FmtE contains the percision of the output + // - int -> fp: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp ? FmtE : (FOpCtrlE[1:0] == `FMT); + assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp ? FmtE : FOpCtrlE[1:0]; + assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////// @@ -67,9 +92,9 @@ module fcvt ( // 1) negate the input if the input is a negitive singed integer // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) - assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE; + assign PosInt = Cs ? -Int : Int; assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; - assign IntZeroE = ~|TrimInt; + assign IntZero = ~|TrimInt; /////////////////////////////////////////////////////////////////////////// // lzc @@ -78,10 +103,11 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : - {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; + assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + {Xm, {`CVTLEN-`NF{1'b0}}}; + assign LzcIn = LzcInFull[`CVTLEN-1:0]; - lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt); + lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); /////////////////////////////////////////////////////////////////////////// // shifter @@ -95,13 +121,13 @@ module fcvt ( // denormalized/undeflowed result fp -> fp: // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 // ??? -> fp: - // - shift left by ZeroCnt+1 - to shift till the result is normalized + // - shift left by LeadingZeros - to shift till the result is normalized // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} : - CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : - (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}}; + assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : + ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : + (LeadingZeros); /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -151,15 +177,15 @@ module fcvt ( // select the old exponent // int -> fp : largest bias + XLEN // fp -> ??? : XExp - assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : XExpE; + assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe; // calculate CalcExp // fp -> fp : - // - XExp - Largest bias + new bias - (ZeroCnt+1) + // - XExp - Largest bias + new bias - (LeadingZeros+1) // only do ^ if the input was denormalized // - convert the expoenent to the final preciaion (Exp - oldBias + newBias) - // - correct the expoent when there is a normalization shift ( + ZeroCnt+1) - // fp -> int : XExp - Largest Bias + 1 - (ZeroCnt+1) + // - correct the expoent when there is a normalization shift ( + LeadingZeros+1) + // fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1) // | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp // process: // - start @@ -173,18 +199,18 @@ module fcvt ( // | 0's | Mantissa | 0's if nessisary | // | keep | // - // - if the input is denormalized then we dont shift... so the "- (ZeroCnt+1)" is just leftovers from other options - // int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt + // - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options + // int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros // Process: // - shifted right by XLEN (XLEN) - // - shift left to normilize (-1-ZeroCnt) + // - shift left to normilize (-LeadingZeros) // - newBias to make the biased exponent - // - assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})}; + // oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp) + assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion - assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp; + assign ResDenormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp; /////////////////////////////////////////////////////////////////////////// @@ -196,7 +222,7 @@ module fcvt ( // - if 64-bit : check the msb of the 64-bit integer input and if it's signed // - if 32-bit : check the msb of the 32-bit integer input and if it's signed // - otherwise: the floating point input's sign - assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE; + assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs; endmodule diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index a425ad9b4..6b1bc6381 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -1,32 +1,60 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Post-Processing flag calculation +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module flags( - input logic XSgnM, - input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs - input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic Xs, + input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs + input logic XInf, YInf, ZInf, // inputs are infinity input logic Plus1, input logic InfIn, // is a Inf input being used - input logic XZeroM, YZeroM, // inputs are zero - input logic XNaNM, YNaNM, // inputs are NaN input logic NaNIn, // is a NaN input being used + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic XZero, YZero, // inputs are zero + input logic XNaN, YNaN, // inputs are NaN input logic Sqrt, // Sqrt? input logic ToInt, // convert to integer input logic IntToFp, // convert integer to floating point input logic Int64, // convert to 64 bit integer input logic Signed, // convert to a signed integer - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`NE:0] CvtCalcExpM, // the calculated expoent - Cvt + input logic [`NE:0] CvtCe, // the calculated expoent - Cvt input logic CvtOp, // conversion opperation? input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? - input logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow - input logic [`NE+1:0] RoundExp, // exponent of the normalized sum - input logic [1:0] NegResMSBS, // the negitive integer result's most significant bits - input logic ZSgnEffM, PSgnM, // the product and modified Z signs - input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding + input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow + input logic [`NE+1:0] Me, // exponent of the normalized sum + input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits + input logic FmaAs, FmaPs, // the product and modified Z signs + input logic R, UfL, S, UfPlus1, // bits used to determine rounding output logic DivByZero, output logic IntInvalid, Invalid, Overflow, // flags used to select the res - output logic [4:0] PostProcFlgM // flags + output logic [4:0] PostProcFlg // flags ); logic SigNaN; // is an input a signaling NaN logic Inexact; // inexact flag @@ -36,7 +64,7 @@ module flags( logic DivInvalid; // integer invalid flag logic Underflow; // Underflow flag logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent - logic ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift") + logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift") /////////////////////////////////////////////////////////////////////////////// // Flags @@ -45,30 +73,30 @@ module flags( if (`FPSIZES == 1) begin - assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 2) begin - assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); + assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); - `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]); + `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); default: ResExpGteMax = 1'bx; endcase - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 4) begin always_comb case (OutFmt) - `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE]; - `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]); - `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]); - `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]); + `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; + `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); + `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); + `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); endcase // a left shift of intlen+1 is still in range but any more than that is an overflow // inital: | 64 0's | XLEN | @@ -82,14 +110,14 @@ module flags( // - any of the bits after the most significan 1 is one // - the most signifcant in 65 or 33 is still a one in the number and // one of the later bits is one - assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end // if the result is greater than or equal to the max exponent(not taking into account sign) // | and the exponent isn't negitive // | | if the input isnt infinity or NaN // | | | - assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero); + assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); // detecting tininess after rounding // the exponent is negitive @@ -99,16 +127,16 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn|DivByZero); + assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) // | | - assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid; + assign IntInexact = ((CvtCe[`NE]&~XZero)|S|R)&~IntInvalid; // select the inexact flag to output assign Inexact = ToInt ? IntInexact : FpInexact; @@ -125,22 +153,22 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]); + assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range - assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp); - assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); - assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt); + assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); + assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~XNaN & ~YNaN) | (XZero & YInf) | (YZero & XInf); + assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt); assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); // if dividing by zero and not 0/0 // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) - assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn); + assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn); // Combine flags // - to integer results do not set the underflow or overflow flags - assign PostProcFlgM = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact}; + assign PostProcFlg = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact}; endmodule diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 88e0a96e7..44cd3616a 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, David Harris +// Written: me@KatherineParry.com, David Harris // Modified: 6/23/2021 // // Purpose: Floating point multiply-accumulate of configurable size @@ -30,28 +30,27 @@ `include "wally-config.vh" module fma( - input logic XSgnE, YSgnE, ZSgnE, // input's signs - input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format - input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format - input logic XZeroE, YZeroE, ZZeroE, // is the input zero - input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign - output logic AddendStickyE, // sticky bit that is calculated during alignment - output logic KillProdE, // set the product to zero before addition if the product is too small to matter - output logic [3*`NF+5:0] SumE, // the positive sum - output logic NegSumE, // was the sum negitive - output logic InvZE, // intert Z - output logic ZSgnEffE, // the modified Z sign - output logic PSgnE, // the product's sign - output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt + input logic Xs, Ys, Zs, // input's signs + input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format + input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format + input logic XZero, YZero, ZZero, // is the input zero + input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad + output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign + output logic ZmSticky, // sticky bit that is calculated during alignment + output logic KillProd, // set the product to zero before addition if the product is too small to matter + output logic [3*`NF+5:0] Sm, // the positive sum's significand + output logic NegSum, // was the sum negitive + output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) + output logic As, // the aligned addend's sign (modified Z sign for other opperations) + output logic Ps, // the product's sign + output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); - logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format - logic [3*`NF+5:0] AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1) - logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted - logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed - logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum + logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format + logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) + logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted + logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -62,46 +61,41 @@ module fma( // calculate the product's exponent - expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, - .ProdExpE); + expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe); // multiplication of the mantissa's - mult mult(.XManE, .YManE, .ProdManE); + mult mult(.Xm, .Ym, .Pm); /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - - align align(.ZExpE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .XExpE, .YExpE, - .AlignedAddendE, .AddendStickyE, .KillProdE); - // calculate the signs and take the opperation into account - sign sign(.FOpCtrlE, .XSgnE, .YSgnE, .ZSgnE, .PSgnE, .ZSgnEffE); + sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + + align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + .Am, .ZmSticky, .KillProd); + + // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm); - loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE); - - // Choose the positive sum and accompanying LZA result. - assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; - - + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule module expadd( - input logic [`FMTBITS-1:0] FmtE, // precision - input logic [`NE-1:0] XExpE, YExpE, // input exponents - input logic XZeroE, YZeroE, // are the inputs zero - output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2 + input logic [`FMTBITS-1:0] Fmt, // format of the output: single double half quad + input logic [`NE-1:0] Xe, Ye, // input's exponents + input logic XZero, YZero, // are the inputs zero + output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 ); // kill the exponent if the product is zero - either X or Y is 0 - assign ProdExpE = ({2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}}; + assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}}; endmodule @@ -110,10 +104,10 @@ endmodule module mult( - input logic [`NF:0] XManE, YManE, - output logic [2*`NF+1:0] ProdManE + input logic [`NF:0] Xm, Ym, + output logic [2*`NF+1:0] Pm ); - assign ProdManE = XManE * YManE; + assign Pm = Xm * Ym; endmodule @@ -124,19 +118,19 @@ endmodule module sign( - input logic [2:0] FOpCtrlE, // precision - input logic XSgnE, YSgnE, ZSgnE, // are the inputs denormalized - output logic PSgnE, // the product's sign - takes opperation into account - output logic ZSgnEffE // Z sign used in fma - takes opperation into account + input logic [2:0] FOpCtrl, // opperation contol + input logic Xs, Ys, Zs, // sign of the inputs + output logic Ps, // the product's sign - takes opperation into account + output logic As // aligned addend sign used in fma - takes opperation into account ); // Calculate the product's sign // Negate product's sign if FNMADD or FNMSUB // flip is negation opperation - assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]); + assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]); // flip if subtraction - assign ZSgnEffE = ZSgnE^FOpCtrlE[0]; + assign As = Zs^FOpCtrl[0]; endmodule @@ -148,18 +142,18 @@ endmodule module align( - input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format - input logic [`NF:0] ZManE, // fractions in U(0.NF) format] - input logic XZeroE, YZeroE, ZZeroE, // is the input zero - input logic [`NE+1:0] ProdExpE, // the product's exponent - output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) - output logic AddendStickyE, // Sticky bit calculated from the aliged addend - output logic KillProdE // should the product be set to zero + input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format + input logic [`NF:0] Zm, // significand in U(0.NF) format] + input logic XZero, YZero, ZZero, // is the input zero + output logic [3*`NF+5:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic ZmSticky, // Sticky bit calculated from the aliged addend + output logic KillProd // should the product be set to zero ); - logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1) + logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format + logic [4*`NF+5:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+5:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic KillZ; /////////////////////////////////////////////////////////////////////////////// // Alignment shifter @@ -168,16 +162,19 @@ module align( // determine the shift count for alignment // - negitive means Z is larger, so shift Z left // - positive means the product is larger, so shift Z right - // *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22 - // KP- yes we used ProdExpE originally but we did this for timing - assign AlignCnt = XZeroE|YZeroE ? -(`NE+2)'($unsigned(1)) : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF)+3 - {2'b0, ZExpE}; + // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze}; // Defualt Addition without shifting // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {ZManE,(3*`NF+5)'(0)}; + assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; + + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); + always_comb begin @@ -185,18 +182,9 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | - if ($signed(AlignCnt) < $signed((`NE+2)'(0))) begin - KillProdE = 1; - ZManShifted = ZManPreShifted; - AddendStickyE = ~(XZeroE|YZeroE); - - // If the Addend is shifted right - // | 54'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if ($signed(AlignCnt)<=$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5))) begin - KillProdE = 0; - ZManShifted = ZManPreShifted >> AlignCnt; - AddendStickyE = |(ZManShifted[`NF-1:0]); + if (KillProd) begin + ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; + ZmSticky = ~(XZero|YZero); // If the addend is too small to effect the addition // - The addend has to shift two past the end of the addend to be considered too small @@ -204,15 +192,21 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | + end else if (KillZ) begin + ZmShifted = 0; + ZmSticky = ~ZZero; + + // If the Addend is shifted right + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | end else begin - KillProdE = 0; - ZManShifted = 0; - AddendStickyE = ~ZZeroE; + ZmShifted = ZmPreshifted >> ACnt; + ZmSticky = |(ZmShifted[`NF-1:0]); end end - assign AlignedAddendE = ZManShifted[4*`NF+5:`NF]; + assign Am = ZmShifted[4*`NF+5:`NF]; endmodule @@ -223,17 +217,18 @@ endmodule module add( - input logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) - input logic [2*`NF+1:0] ProdManE, // the product's mantissa - input logic PSgnE, ZSgnEffE,// the product and modified Z signs - input logic KillProdE, // should the product be set to 0 - input logic XZeroE, YZeroE, // is the input zero - output logic [3*`NF+6:0] AlignedAddendInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] ProdManKilled, // the product's mantissa possibly killed - output logic NegSumE, // was the sum negitive - output logic InvZE, // do you invert Z - output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum + input logic [3*`NF+5:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [2*`NF+1:0] Pm, // the product's mantissa + input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic KillProd, // should the product be set to 0 + input logic ZmSticky, + output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted + output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed + output logic NegSum, // was the sum negitive + output logic InvA, // do you invert the aligned addend + output logic [3*`NF+5:0] Sm // the positive sum ); + logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition @@ -242,30 +237,33 @@ module add( // Negate Z when doing one of the following opperations: // -prod + Z // prod - Z - assign InvZE = ZSgnEffE ^ PSgnE; + assign InvA = As ^ Ps; // Choose an inverted or non-inverted addend - the one has to be added now for the LZA - assign AlignedAddendInv = InvZE ? {1'b1, ~AlignedAddendE} : {1'b0, AlignedAddendE}; + assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}}; - - - + assign PmKilled = Pm&{2*`NF+2{~KillProd}}; // Do the addition // - calculate a positive and negitive sum in parallel - assign PreSum = AlignedAddendInv + {{`NF+3{1'b0}}, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE}; - assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)}; + // Zsticky Psticky + // PreSum -1 = don't add 1 +1 = add 2 + // NegPreSum +1 = add 2 -1 = don't add 1 + // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 + assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive - assign NegSumE = PreSum[3*`NF+6]; + assign NegSum = PreSum[3*`NF+6]; + // Choose the positive sum and accompanying LZA result. + assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; endmodule -module loa( //https://ieeexplore.ieee.org/abstract/document/930098 +module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+1:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result + input logic [2*`NF+3:0] P, // product + output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result ); logic [3*`NF+6:0] T; @@ -276,12 +274,9 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; assign G[3*`NF+6:2*`NF+4] = 0; assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; - assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; - assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; - assign T[1:0] = A[1:0]; - assign G[1:0] = 0; - assign Z[1:0] = ~A[1:0]; + assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; + assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; + assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; // Apply function to determine Leading pattern @@ -293,6 +288,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE)); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt)); endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 2a2417281..a6c1a1c60 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -1,16 +1,43 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Fma shift calculation +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module fmashiftcalc( - input logic [3*`NF+5:0] SumM, // the positive sum - input logic [`NE-1:0] ZExpM, // exponent of Z - input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias - input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count - input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single - input logic KillProdM, // is the product set to zero - input logic ZDenormM, - output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - output logic SumZero, // is the result denormalized - calculated before LZA corection - output logic PreResultDenorm, // is the result denormalized - calculated before LZA corection + input logic [3*`NF+5:0] FmaSm, // the positive sum + input logic [`NE-1:0] Ze, // exponent of Z + input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias + input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic FmaKillProd, // is the product set to zero + output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results + output logic FmaSZero, // is the result denormalized - calculated before LZA corection + output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero ); @@ -22,35 +49,36 @@ module fmashiftcalc( /////////////////////////////////////////////////////////////////////////////// //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero - assign SumZero = ~(|SumM); + assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4); + // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 + assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); - //convert the sum's exponent into the propper percision + //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin - assign ConvNormSumExp = NormSumExp; + assign FmaNe = NormSumExp; end else if (`FPSIZES == 2) begin - assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; end else if (`FPSIZES == 3) begin always_comb begin - case (FmtM) - `FMT: ConvNormSumExp = NormSumExp; - `FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - `FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; - default: ConvNormSumExp = {`NE+2{1'bx}}; + case (Fmt) + `FMT: FmaNe = NormSumExp; + `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; + default: FmaNe = {`NE+2{1'bx}}; endcase end end else if (`FPSIZES == 4) begin always_comb begin - case (FmtM) - 2'h3: ConvNormSumExp = NormSumExp; - 2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; - 2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; - 2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; + case (Fmt) + 2'h3: FmaNe = NormSumExp; + 2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; + 2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; + 2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; endcase end @@ -62,7 +90,7 @@ module fmashiftcalc( logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; + assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; @@ -70,7 +98,7 @@ module fmashiftcalc( assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero; + assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; @@ -81,11 +109,11 @@ module fmashiftcalc( assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)); assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; always_comb begin - case (FmtM) - `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; - `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero; - `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero; - default: PreResultDenorm = 1'bx; + case (Fmt) + `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + default: FmaPreResultDenorm = 1'bx; endcase end @@ -100,12 +128,12 @@ module fmashiftcalc( assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)); assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; always_comb begin - case (FmtM) - 2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; - 2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero; - 2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero; - 2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero; - endcase + case (Fmt) + 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; + endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking end end @@ -116,13 +144,13 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero; + // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = PreResultDenorm&~KillProdM ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized - assign FmaShiftIn = {3'b0, SumM}; - assign FmaShiftAmt = (FmaNormCntM&{$clog2(3*`NF+7){~KillProdM}})+DenormShift; + assign FmaShiftIn = {3'b0, FmaSm}; + assign FmaShiftAmt = FmaNCnt+DenormShift; endmodule diff --git a/pipelined/src/fpu/fpdiv.sv b/pipelined/src/fpu/fpdiv.sv deleted file mode 100755 index 127a7686c..000000000 --- a/pipelined/src/fpu/fpdiv.sv +++ /dev/null @@ -1,132 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Floating point divider/square root top unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -// `timescale 1ps/1ps -module fpdiv ( - input logic clk, - input logic reset, - input logic start, - input logic [63:0] op1, - input logic [63:0] op2, - input logic [1:0] rm, - input logic op_type, - input logic P, - input logic OvEn, - input logic UnEn, - input logic XNaNQ, - input logic YNaNQ, - input logic XZeroQ, - input logic YZeroQ, - input logic XInfQ, - input logic YInfQ, - - output logic done, - output logic FDivBusyE, - output logic [63:0] AS_Result, - output logic [4:0] Flags); - - logic [63:0] Float1; - logic [63:0] Float2; - - logic [12:0] exp1, exp2, expF; - logic [12:0] exp_diff, bias; - logic [13:0] exp_sqrt; - logic [63:0] Result; - logic [52:0] mantissaA; - logic [52:0] mantissaB; - - logic [2:0] sel_inv; - logic Invalid; - logic [4:0] FlagsIn; - logic signResult; - - logic [59:0] q1, qm1, qp1, q0, qm0, qp0; - logic [59:0] rega_out, regb_out, regc_out, regd_out; - logic [119:0] regr_out; - logic [2:0] sel_muxa, sel_muxb; - logic sel_muxr; - logic load_rega, load_regb, load_regc, load_regd, load_regr; - - logic load_regs; - logic exp_cout1, exp_cout2; - logic exp_odd, open; - - // op_type : fdiv=0, fsqrt=1 - assign Float1 = op1; - assign Float2 = op_type ? op1 : op2; - - // Exception detection - exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); - - // Determine Sign/Mantissa - assign signResult = (Float1[63]^Float2[63]); - assign mantissaA = {1'b1, Float1[51:0]}; - assign mantissaB = {1'b1, Float2[51:0]}; - // Perform Exponent Subtraction - expA - expB + Bias - assign exp1 = {2'b0, Float1[62:52]}; - assign exp2 = {2'b0, Float2[62:52]}; - assign bias = {3'h0, 10'h3FF}; - // Divide exponent - assign {exp_cout1, open, exp_diff} = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; - - // Sqrt exponent (check if exponent is odd) - assign exp_odd = Float1[52] ? 1'b0 : 1'b1; - assign {exp_cout2, exp_sqrt} = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; - // Choose correct exponent - assign expF = op_type ? exp_sqrt[13:1] : exp_diff; - - // Main Goldschmidt/Division Routine - divconv goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, .rega_out, .regb_out, .regc_out, .regd_out, - .regr_out, .d(mantissaB), .n(mantissaA), .sel_muxa, .sel_muxb, .sel_muxr, - .reset, .clk, .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .P, .op_type, .exp_odd); - - // FSM : control divider - fsm_fpdiv control (.clk, .reset, .start, .op_type, - .done, .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, - .divBusy(FDivBusyE)); - - // Round the mantissa to a 52-bit value, with the leading one - // removed. The rounding units also handles special cases and - // set the exception flags. - rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), - .sel_inv, .Invalid, .SignR(signResult), - .Float1(op1), .Float2(op2), - .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, - .XInfQ, .YInfQ, .op_type, - .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, - .Result, .Flags(FlagsIn)); - - // Store the final result and the exception flags in registers. - flopenr #(64) rega (clk, reset, done, Result, AS_Result); - flopenr #(5) regc (clk, reset, done, FlagsIn, Flags); - -endmodule // fpadd - diff --git a/pipelined/src/fpu/fpdiv_pipe.sv b/pipelined/src/fpu/fpdiv_pipe.sv deleted file mode 100755 index 8e4bd2399..000000000 --- a/pipelined/src/fpu/fpdiv_pipe.sv +++ /dev/null @@ -1,170 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Floating point divider/square root top unit pipelined version (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module fpdiv_pipe ( - input logic clk, - input logic reset, - input logic start, - input logic [63:0] op1, - input logic [63:0] op2, - input logic [1:0] rm, - input logic op_type, - input logic P, - input logic OvEn, - input logic UnEn, - input logic XNaNQ, - input logic YNaNQ, - input logic XZeroQ, - input logic YZeroQ, - input logic XInfQ, - input logic YInfQ, - - output logic done, - output logic FDivBusyE, - output logic load_preload, - output logic [63:0] AS_Result, - output logic [4:0] Flags); - - supply1 vdd; - supply0 vss; - - logic [63:0] Float1; - logic [63:0] Float2; - logic [63:0] IntValue; - - logic [12:0] exp1, exp2, expF; - logic [14:0] exp_pre_diff; - logic [12:0] exp_diff, bias; - logic [13:0] exp_sqrt; - - logic [63:0] Result; - logic [52:0] mantissaA; - logic [52:0] mantissaB; - - logic [2:0] sel_inv; - logic Invalid; - logic [4:0] FlagsIn; - logic exp_gt63; - logic Sticky_out; - logic signResult, sign_corr; - logic corr_sign; - logic zeroB; - logic convert; - logic swap; - logic sub; - - logic [59:0] q1, qm1, qp1, q0, qm0, qp0; - logic [59:0] rega_out, regb_out, regc_out, regd_out; - logic [119:0] regr_out; - logic [2:0] sel_muxa, sel_muxb; - logic sel_muxr; - logic load_rega, load_regb, load_regc, load_regd, load_regr; - logic load_regp, load_regs; - - logic exp_odd, exp_odd1; - logic start1; - logic P1; - logic op_type1; - logic [12:0] expF1; - logic [52:0] mantissaA1; - logic [52:0] mantissaB1; - logic [2:0] sel_inv1; - logic signResult1; - logic Invalid1; - - // op_type : fdiv=0, fsqrt=1 - assign Float1 = op1; - assign Float2 = op_type ? op1 : op2; - - // Exception detection - exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid); - - // Determine Sign/Mantissa - assign signResult = ((Float1[63]^Float2[63])&~op_type); - assign mantissaA = {vdd, Float1[51:0]}; - assign mantissaB = {vdd, Float2[51:0]}; - - // Perform Exponent Subtraction - expA - expB + Bias - assign exp1 = {2'b0, Float1[62:52]}; - assign exp2 = {2'b0, Float2[62:52]}; - // bias : DP = 2^{11-1}-1 = 1023 - assign bias = {3'h0, 10'h3FF}; - // Divide exponent - assign exp_pre_diff = {2'b0, exp1} - {2'b0, exp2} + {2'b0, bias}; - assign exp_diff = exp_pre_diff[12:0]; - - // Sqrt exponent (check if exponent is odd) - assign exp_odd = Float1[52] ? 1'b0 : 1'b1; - assign exp_sqrt = {1'b0, exp1} + {4'h0, 10'h3ff} + {13'b0, exp_odd}; - // Choose correct exponent - assign expF = op_type ? exp_sqrt[13:1] : exp_diff; - - flopenr #(1) rega (clk, reset, 1'b1, exp_odd, exp_odd1); - flopenr #(1) regb (clk, reset, 1'b1, P, P1); - flopenr #(1) regc (clk, reset, 1'b1, op_type, op_type1); - flopenr #(13) regd (clk, reset, 1'b1, expF, expF1); - flopenr #(53) rege (clk, reset, 1'b1, mantissaA, mantissaA1); - flopenr #(53) regf (clk, reset, 1'b1, mantissaB, mantissaB1); - flopenr #(1) regg (clk, reset, 1'b1, start, start1); - flopenr #(3) regh (clk, reset, 1'b1, sel_inv, sel_inv1); - flopenr #(1) regj (clk, reset, 1'b1, signResult, signResult1); - flopenr #(1) regk (clk, reset, 1'b1, Invalid, Invalid1); - - // Main Goldschmidt/Division Routine - divconv_pipe goldy (.q1, .qm1, .qp1, .q0, .qm0, .qp0, - .rega_out, .regb_out, .regc_out, .regd_out, - .regr_out, .d(mantissaB1), .n(mantissaA1), - .sel_muxa, .sel_muxb, .sel_muxr, .reset, .clk, - .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .load_regp, - .P(P), .op_type(op_type1), .exp_odd(exp_odd1)); - - // FSM : control divider - fsm_fpdiv_pipe control (.clk, .reset, .start(start), .op_type(op_type1), .P(P), - .done, .load_rega, .load_regb, .load_regc, .load_regd, - .load_regr, .load_regs, .load_regp, .load_preload, - .sel_muxa, .sel_muxb, .sel_muxr, .divBusy(FDivBusyE)); - - // Round the mantissa to a 52-bit value, with the leading one - // removed. The rounding units also handles special cases and - // set the exception flags. - rounder_div round1 (.rm, .P(P), .OvEn(1'b0), .UnEn(1'b0), .exp_diff(expF1), - .sel_inv(sel_inv1), .Invalid(Invalid1), .SignR(signResult1), - .Float1(op1), .Float2(op2), - .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, - .XInfQ, .YInfQ, .op_type(op_type1), - .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, - .Result, .Flags(FlagsIn)); - - // Store the final result and the exception flags in registers. - flopenr #(64) regl (clk, reset, done, Result, AS_Result); - flopenr #(5) regn (clk, reset, done, FlagsIn, Flags); - -endmodule // fpdiv_pipe - diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 25b39d69b..65be29972 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, James Stine, Brett Mathis +// Written: me@KatherineParry.com, James Stine, Brett Mathis // Modified: 6/23/2021 // // Purpose: FPU @@ -42,7 +42,7 @@ module fpu ( input logic [1:0] STATUS_FS, // Is floating-point enabled? output logic FRegWriteM, // FP register write enable output logic FpLoadStoreM, // Fp load instruction? - output logic FLoad2, + output logic FStore2, output logic FStallD, // Stall the decode stage output logic FWriteIntE, // integer register write enables output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory @@ -110,7 +110,7 @@ module fpu ( logic [`NE+1:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; logic KillProdE, KillProdM; - logic InvZE, InvZM; + logic InvAE, InvAM; logic NegSumE, NegSumM; logic ZSgnEffE, ZSgnEffM; logic PSgnE, PSgnM; @@ -125,11 +125,11 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN+2:0] Quot; - logic [`NE+1:0] DivCalcExpM; - logic DivNegStickyM; - logic DivStickyM; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M; + logic [`QLEN-1-(`RADIX/4):0] QuotM; + logic [`NE+1:0] DivCalcExpE, DivCalcExpM; + logic DivStickyE, DivStickyM; + logic DivDoneM; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -185,9 +185,10 @@ module fpu ( flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); - flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); + flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); + flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); // EXECUTION STAGE @@ -249,47 +250,55 @@ module fpu ( .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); // fma - does multiply, add, and multiply-add instructions - fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, - .XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, - .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE, - .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); - - // fpdivsqrt using Goldschmidt's iteration - if(`FLEN == 64) begin - flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - end - else if (`FLEN == 32) begin - flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - end - flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), + .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), + .Xm(XManE), .Ym(YManE), .Zm(ZManE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), + .FOpCtrl(FOpCtrlE), .Fmt(FmtE), + .As(ZSgnEffE), .Ps(PSgnE), + .Sm(SumE), .Pe(ProdExpE), + .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), + .ZmSticky(AddendStickyE), .KillProd(KillProdE)); + // // fpdivsqrt using Goldschmidt's iteration + // if(`FLEN == 64) begin + // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // end + // else if (`FLEN == 32) begin + // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // end + // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), + // .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), + // .clear(FDivSqrtDoneE), .en(load_preload), + // .reset(reset), .clk(clk)); + // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), + // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, + // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), + .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal + .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE); - fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, - .FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE, - .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE, - .CvtLzcInE); + fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), + .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), + .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), + .LzcIn(CvtLzcInE)); // data to be stored in memory - to IEU // - FP uses NaN-blocking format @@ -298,8 +307,8 @@ module fpu ( assign FWriteDataE = FSrcYE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; - if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT; - else assign FLoad2 = FmtM; + if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; + else assign FStore2 = FmtM; if (`FPSIZES==1) assign FWriteDataE = FSrcYE; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; @@ -351,8 +360,8 @@ module fpu ( flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, - {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); + {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, + {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -371,12 +380,12 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M, - .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, - .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, - .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM, - .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM, - .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM); + postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), + .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), + .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), + .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); diff --git a/pipelined/src/fpu/fpudivsqrtrecur.sv b/pipelined/src/fpu/fpudivsqrtrecur.sv deleted file mode 100644 index 62a441367..000000000 --- a/pipelined/src/fpu/fpudivsqrtrecur.sv +++ /dev/null @@ -1,74 +0,0 @@ -/////////////////////////////////////////// -// -// Written: David Harris -// Modified: 11 September 2021 -// -// Purpose: Recurrence-based SRT Division and Square Root -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module fpudivsqrtrecur ( - input logic clk, - input logic reset, - input logic FlushM, // flush the memory stage - input logic StallM, // stall memory stage - input logic FDivSqrtStart, // start a computation - input logic FmtE, // precision 1 = double 0 = single - input logic FDivE, FSqrtE, - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic XSgnE, YSgnE, // input signs - execute stage - input logic [`NE-1:0] XExpE, YExpE, // input exponents - execute stage - input logic [`NF:0] XManE, YManE, // input mantissa - execute stage - input logic XDenormE, YDenormE, // is denorm - input logic XZeroE, YZeroE, // is zero - execute stage - input logic XNaNE, YNaNE, // is NaN - input logic XSNaNE, YSNaNE, // is signaling NaN - input logic XInfE, YInfE, ZInfE, // is infinity - input logic [10:0] BiasE, // bias (max exponent/2) ***parameterize in unpacking unit - output logic FDviSqrtBusy, FDivSqrtDone, //currently occpied, or done with operation - output logic [`FLEN-1:0] FDivSqrtResM, // result - output logic [4:0] FDivSqrtFlgM // flags - ); - - logic FDivSqrtResSgn; - logic [`FLEN-1:0] FDivSqrtRecurRes; - - // Radix-2 SRT Division and Square Root - - // Special Cases - // *** shift to handle denorms in hardware - - assign FDivSqrtResSgn = FDivE & (XSgnE ^ YSgnE); // Sign is negative for division if inputs have opposite signs - - always_comb begin - if (FSqrtE & XSgnE | FDivE & XZeroE & YZeroE | XNaNE | FDivE & YNaNE) FDivSqrtResM = 0; // ***replace with NAN; // *** which one - else if (FDivE & YZeroE | XInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(1), (`NF)'(0)}; // infinity - else if (FDivE & YInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(0), (`NF)'(0)}; // zero - else FDivSqrtResM = FDivSqrtRecurRes; - end - - // *** handle early termination in the special cases - // *** handle signaling NANs -endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fpudivsqrtrecurcore.sv b/pipelined/src/fpu/fpudivsqrtrecurcore.sv deleted file mode 100644 index b13ef0da2..000000000 --- a/pipelined/src/fpu/fpudivsqrtrecurcore.sv +++ /dev/null @@ -1,105 +0,0 @@ -/////////////////////////////////////////// -// -// Written: David Harris -// Modified: 11 September 2021 -// -// Purpose: Recurrence-based SRT Division and Square Root -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -// Bit counts: -// Inputs are originally normalized floating point numbers with NF fractional bits and a leading 1 integer bit -// x is right shifted by up to 2 to be in the range of 1/4 <= x < 1/2 for divide, 1/4 <= x < 1 for sqrt -// Hence, x now has NF+2 fractional bits and 0 integer bits -// d is right shifted by 1 to be in the range of 1/2 <= d < 1. It thus has NF+1 fractional bits and 0 integer bits -// q is eventually in the range of 1/4 < q < 1 and hence needs NF+2 bits to keep NF bits when normalized, plus some*** more bits for rounding -// The partial - -/* -module fpudivsqrtrecurcore ( - input logic clk, - input logic reset, - input logic start, // start a computation - input logic busy, // computation running - input logic fmt, // precision 1 = double 0 = single - input logic [`NF+1:0] x, // in range 1/4 <= x < 1/2 for divide, 1/4 <=x < 1 for sqrt - input logic [`NF+1:0] din, // in range 1/2 <= d < 1 for divide - input logic FDiv, FSqrt, // *** not yet used - output logic [`FLEN-1:0] FDivSqrtRecurRes // result - ); - - assign FDivSqrtRecurRes = 0; - - logic [***] d, ws, wsout, wsnext, wc, wcout, wcnext; - logic [1:0] q; // 00 = 0, 01 = 1, 10 = -1 - - // Radix-2 SRT Division - - // registers for divisor and partial remainder - flopen #(NF+1) dreg(clk, start, din, d); - mux2 #(NF+1) wsmux(wsout, x, start, wsnext); - flopen #(NF+1) wsreg(clk, busy, wsnext, ws); - mux2 #(NF+1) wcmux(wcout, 0, start, wcnext); - flopen #(NF+1) wcreg(clk, busy, wcnext, wc); - - // quotient selection - qsel qsel(ws[***4bits], wc[***], q); - - // partial remainder update - always_comb begin // select -d * q to add to partial remainder - if (q[1]) dq = d; - else if (q[0]) dq = ~d; - else dq = 0; - end - csa #(***) csa(ws, wc, dq, q[1], wsout, wcout); - - -endmodule -*/ - -/* -module csa #(parameter N=4) ( - input logic [N-1:0] sin, cin, ain, - input logic carry, - output logic [N-1:0] sum, cout -); - - logic [N-1:0] c; - - assign c = {cin[N-2:0], carry}; // shift carries left and inject optional 1 into lsb - assign sum = sin ^ ain ^ c; - assign cout = sin & ain | sin & c | ain & c; -endmodule -*/ - -module qsel( // radix 2 SRT division quotient selection - input logic [3:0] wc, ws, - output logic [1:0] q -); - -endmodule - - - diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 0d08d31fe..17d15669f 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: Katherine Parry +// Written: me@KatherineParry.com // Modified: 6/23/2021 // // Purpose: FPU Sign Injection instructions diff --git a/pipelined/src/fpu/fsm_fpdiv.sv b/pipelined/src/fpu/fsm_fpdiv.sv deleted file mode 100755 index 37e6fe2ed..000000000 --- a/pipelined/src/fpu/fsm_fpdiv.sv +++ /dev/null @@ -1,537 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 9/28/2021 -// -// Purpose: FSM for floating point divider/square root unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module fsm_fpdiv ( - input logic clk, - input logic reset, - input logic start, - input logic op_type, - output logic done, - output logic load_rega, - output logic load_regb, - output logic load_regc, - output logic load_regd, - output logic load_regr, - output logic load_regs, - output logic [2:0] sel_muxa, - output logic [2:0] sel_muxb, - output logic sel_muxr, - output logic divBusy - ); - - typedef enum logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, - S30} statetype; - - statetype current_state, next_state; - - always @(posedge clk) - begin - if (reset == 1'b1) - current_state = S0; - else - current_state = next_state; - end - - always_comb - begin - case(current_state) - S0: // iteration 0 - begin - if (start==1'b0) - begin - done = 1'b0; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - else if (start==1'b1 & op_type==1'b0) - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b001; - sel_muxb = 3'b001; - sel_muxr = 1'b0; - next_state = S1; - end // if (start==1'b1 & op_type==1'b0) - else if (start==1'b1 & op_type==1'b1) - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b010; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S13; - end - else - begin - done = 1'b0; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - end // case: S0 - S1: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b010; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S2; - end - S2: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S3; - end - S3: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S4; - end - S4: // iteration 2 - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S5; - end - S5: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; // add - next_state = S6; - end - S6: // iteration 3 - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S8; - end - S7: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S8; - end // case: S7 - S8: // q,qm,qp - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S9; - end - S9: // rem - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b1; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b1; - next_state = S10; - end - S10: // done - begin - done = 1'b1; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S11; - end // case: S10 - S11: // done - begin - done = 1'b0; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - S13: // start of sqrt path - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b010; - sel_muxb = 3'b001; - sel_muxr = 1'b0; - next_state = S14; - end - S14: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b001; - sel_muxb = 3'b100; - sel_muxr = 1'b0; - next_state = S15; - end - S15: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S16; - end - S16: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S17; - end - S17: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S18; - end - S18: // iteration 2 - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S19; - end - S19: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S20; - end - S20: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S21; - end - S21: // iteration 3 - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S22; - end - S22: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S23; - end - S23: - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S24; - end - S24: // q,qm,qp - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S25; - end - S25: // rem - begin - done = 1'b0; - divBusy = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b1; - load_regs = 1'b0; - sel_muxa = 3'b011; - sel_muxb = 3'b110; - sel_muxr = 1'b1; - next_state = S26; - end - S26: // done - begin - done = 1'b1; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S27; - end // case: S26 - S27: // done - begin - done = 1'b0; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - default: - begin - done = 1'b0; - divBusy = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - endcase // case(current_state) - end // always @ (current_state or X) - -endmodule // fsm diff --git a/pipelined/src/fpu/fsm_fpdiv_pipe.sv b/pipelined/src/fpu/fsm_fpdiv_pipe.sv deleted file mode 100755 index 33d270b89..000000000 --- a/pipelined/src/fpu/fsm_fpdiv_pipe.sv +++ /dev/null @@ -1,1330 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 9/28/2021 -// -// Purpose: FSM for floating point divider/square root unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module fsm_fpdiv_pipe ( - input logic clk, - input logic reset, - input logic start, - input logic op_type, - input logic P, - output logic done, - output logic load_preload, - output logic load_rega, - output logic load_regb, - output logic load_regc, - output logic load_regd, - output logic load_regr, - output logic load_regs, - output logic load_regp, - output logic [2:0] sel_muxa, - output logic [2:0] sel_muxb, - output logic sel_muxr, - output logic divBusy - ); - - // div64 : S1-S14 (14 cycles) - // sqrt64 : S15-S35 (21 cycles) - // div32: S36-S47 (12 cycles) - // sqrt32 : S48-S64 (17 cycles) - typedef enum logic [6:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, - S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, - S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, - S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, - S60, S61, S62, S63, S64, S65, S66} statetype; - - statetype current_state, next_state; - - always @(posedge clk) - begin - if (reset == 1'b1) - current_state <= S0; - else - current_state <= next_state; - end - - always @(*) - begin - case(current_state) - S0: // iteration 0 - begin - if (start==1'b0) - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end // if (start==1'b0) - else - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b1; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S66; - end - end // case: S0 - S66: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S65; - end // if (start==1'b0) - S65: - begin - if (op_type==1'b0 & P==1'b0) - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b010; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S1; - end - else if (op_type==1'b0 & P==1'b1) - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b010; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S36; - end - else if (op_type==1'b1 & P==1'b0) - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b010; - sel_muxb = 3'b001; - sel_muxr = 1'b0; - next_state = S15; - end - else if (op_type==1'b1 & P==1'b1) - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b010; - sel_muxb = 3'b001; - sel_muxr = 1'b0; - next_state = S48; - end - else - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - end // case: S0 - // div64 - S1: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b001; - sel_muxb = 3'b001; - sel_muxr = 1'b0; - next_state = S2; - end // case: S1 - S2: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S3; - end - S3: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S4; - end - S4: // iteration 2 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S5; - end - S5: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; // add - next_state = S6; - end - S6: // iteration 3 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S7; - end - S7: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S8; - end // case: S7 - S8: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S9; - end // case: S7 - S9: // q,qm,qp - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b1; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S10; - end // case: S9 - S10: // rem - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b1; - next_state = S11; - end - S11: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b1; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b1; - next_state = S12; - end // case: S11 - S12: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S13; - end - S13: - begin - done = 1'b1; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S14; - end - S14: - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - // sqrt64 - S15: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S16; - end - S16: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b001; - sel_muxb = 3'b100; - sel_muxr = 1'b0; - next_state = S17; - end - S17: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b010; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S18; - end - S18: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S19; - end - S19: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S20; - end - S20: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S21; - end - S21: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S22; - end - S22: // iteration 2 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S23; - end // case: S18 - S23: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S24; - end - S24: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S25; - end - S25: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S26; - end - S26: // iteration 3 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S27; - end // case: S21 - S27: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S28; - end - S28: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S29; - end - S29: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S30; - end // case: S23 - S30: // q,qm,qp - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b1; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S31; - end - S31: // rem - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b110; - sel_muxr = 1'b1; - next_state = S32; - end // case: S25 - S32: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b1; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b110; - sel_muxr = 1'b1; - next_state = S33; - end - S33: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S34; - end - S34: // done - begin - done = 1'b1; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S35; - end - S35: - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - // div32 - S36: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b001; - sel_muxb = 3'b001; - sel_muxr = 1'b0; - next_state = S37; - end // case: S1 - S37: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S38; - end - S38: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S39; - end - S39: // iteration 2 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S40; - end - S40: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S41; - end - S41: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S42; - end - S42: // q,qm,qp - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b1; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S43; - end // case: S9 - S43: // rem - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b1; - next_state = S44; - end - S44: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b1; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b1; - next_state = S45; - end // case: S11 - S45: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S46; - end - S46: // done - begin - done = 1'b1; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S47; - end - S47: - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - // sqrt32 - S48: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S49; - end - S49: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b001; - sel_muxb = 3'b100; - sel_muxr = 1'b0; - next_state = S50; - end - S50: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b010; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S51; - end - S51: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S52; - end - S52: // iteration 1 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S53; - end - S53: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S54; - end - S54: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S55; - end - S55: // iteration 2 - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b1; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S56; - end // case: S18 - S56: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S57; - end - S57: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b1; - load_regb = 1'b0; - load_regc = 1'b1; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b100; - sel_muxb = 3'b010; - sel_muxr = 1'b0; - next_state = S58; - end - S58: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b1; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b011; - sel_muxr = 1'b0; - next_state = S59; - end - S59: // q,qm,qp - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b1; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S60; - end - S60: // rem - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b110; - sel_muxr = 1'b1; - next_state = S61; - end // case: S25 - S61: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b1; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b011; - sel_muxb = 3'b110; - sel_muxr = 1'b1; - next_state = S62; - end // case: S34 - S62: - begin - done = 1'b0; - divBusy = 1'b1; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S63; - end - S63: // done - begin - done = 1'b1; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b1; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S64; - end // case: S34 - S64: - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - default: - begin - done = 1'b0; - divBusy = 1'b0; - load_preload = 1'b0; - load_rega = 1'b0; - load_regb = 1'b0; - load_regc = 1'b0; - load_regd = 1'b0; - load_regr = 1'b0; - load_regs = 1'b0; - load_regp = 1'b0; - sel_muxa = 3'b000; - sel_muxb = 3'b000; - sel_muxr = 1'b0; - next_state = S0; - end - endcase // case(current_state) - end // always @ (current_state or X) - -endmodule // fsm diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv deleted file mode 100644 index a7a8143eb..000000000 --- a/pipelined/src/fpu/lzacorrection.sv +++ /dev/null @@ -1,41 +0,0 @@ -`include "wally-config.vh" - -module lzacorrection( - input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction - input logic FmaOp, - input logic DivOp, - input logic DivResDenorm, - input logic [`NE+1:0] DivCalcExpM, - input logic [`NE+1:0] DivDenormShift, - input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection - input logic KillProdM, // is the product set to zero - input logic SumZero, - output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction - output logic [`NE+1:0] CorrDivExp, - output logic [`NE+1:0] SumExp // exponent of the normalized sum -); - logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ:0] CorrQuotShifted; - logic ResDenorm; // is the result denormalized - logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction - - // LZA correction - assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2]; - assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1]; - // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone - assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; - // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = {LZAPlus2|(DivCalcExpM==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0}; - // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; - // Determine sum's exponent - // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}}; - // recalculate if the result is denormalized - assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; - - // the quotent is in the range [.5,2) if there is no early termination - // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift - assign CorrDivExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExpM - {(`NE+1)'(0), ~LZAPlus2}; -endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/lzd_denorm.sv b/pipelined/src/fpu/lzd_denorm.sv deleted file mode 100755 index a91b0920e..000000000 --- a/pipelined/src/fpu/lzd_denorm.sv +++ /dev/null @@ -1,171 +0,0 @@ -// module lz2 (P, V, B0, B1); - -// input B0; -// input B1; - -// output P; -// output V; - -// assign V = B0 | B1; -// assign P = B0 & ~B1; - -// endmodule // lz2 - -// Note: This module is not made out of two lz2's - why not? (MJS) - -// module lz4 (ZP, ZV, B0, B1, V0, V1); - -// input B0; -// input B1; -// input V0; -// input V1; - -// output [1:0] ZP; -// output ZV; - -// assign ZP[0] = V0 ? B0 : B1; -// assign ZP[1] = ~V0; -// assign ZV = V0 | V1; - -// endmodule // lz4 - -// // Note: This module is not made out of two lz4's - why not? (MJS) - -// module lz8 (ZP, ZV, B); - -// input [7:0] B; - -// wire s1p0; -// wire s1v0; -// wire s1p1; -// wire s1v1; -// wire s2p0; -// wire s2v0; -// wire s2p1; -// wire s2v1; -// wire [1:0] ZPa; -// wire [1:0] ZPb; -// wire ZVa; -// wire ZVb; - -// output [2:0] ZP; -// output ZV; - -// lz2 l1(s1p0, s1v0, B[2], B[3]); -// lz2 l2(s1p1, s1v1, B[0], B[1]); -// lz4 l3(ZPa, ZVa, s1p0, s1p1, s1v0, s1v1); - -// lz2 l4(s2p0, s2v0, B[6], B[7]); -// lz2 l5(s2p1, s2v1, B[4], B[5]); -// lz4 l6(ZPb, ZVb, s2p0, s2p1, s2v0, s2v1); - -// assign ZP[1:0] = ZVb ? ZPb : ZPa; -// assign ZP[2] = ~ZVb; -// assign ZV = ZVa | ZVb; - -// endmodule // lz8 - -// module lz16 (ZP, ZV, B); - -// input [15:0] B; - -// wire [2:0] ZPa; -// wire [2:0] ZPb; -// wire ZVa; -// wire ZVb; - -// output [3:0] ZP; -// output ZV; - -// lz8 l1(ZPa, ZVa, B[7:0]); -// lz8 l2(ZPb, ZVb, B[15:8]); - -// assign ZP[2:0] = ZVb ? ZPb : ZPa; -// assign ZP[3] = ~ZVb; -// assign ZV = ZVa | ZVb; - -// endmodule // lz16 - -// module lz32 (ZP, ZV, B); - -// input [31:0] B; - -// wire [3:0] ZPa; -// wire [3:0] ZPb; -// wire ZVa; -// wire ZVb; - -// output [4:0] ZP; -// output ZV; - -// lz16 l1(ZPa, ZVa, B[15:0]); -// lz16 l2(ZPb, ZVb, B[31:16]); - -// assign ZP[3:0] = ZVb ? ZPb : ZPa; -// assign ZP[4] = ~ZVb; -// assign ZV = ZVa | ZVb; - -// endmodule // lz32 - -// // This module returns the number of leading zeros ZP in the 64-bit -// // number B. If there are no ones in B, then ZP and ZV are both 0. - -// module lz64 (ZP, ZV, B); - -// input [63:0] B; - -// wire [4:0] ZPa; -// wire [4:0] ZPb; -// wire ZVa; -// wire ZVb; - -// output [5:0] ZP; -// output ZV; - -// lz32 l1(ZPa, ZVa, B[31:0]); -// lz32 l2(ZPb, ZVb, B[63:32]); - -// assign ZV = ZVa | ZVb; -// assign ZP[4:0] = (ZVb ? ZPb : ZPa) & {5{ZV}}; -// assign ZP[5] = ~ZVb & ZV; - -// endmodule // lz64 - -// This module returns the number of leading zeros ZP in the 52-bit -// number B. If there are no ones in B, then ZP and ZV are both 0. - -module lz52 (ZP, ZV, B); - - input [51:0] B; - - wire [4:0] ZP_32; - wire [3:0] ZP_16; - wire [1:0] ZP_4; - wire ZV_32; - wire ZV_16; - wire ZV_4; - - wire ZP_2_1; - wire ZP_2_2; - wire ZV_2_1; - wire ZV_2_2; - - output [5:0] ZP; - output ZV; - - lz32 l1 (ZP_32, ZV_32, B[51:20]); - lz16 l2 (ZP_16, ZV_16, B[19:4]); - - lz2 l3_1 (ZP_2_1, ZV_2_1, B[3], B[2]); - lz2 l3_2 (ZP_2_2, ZV_2_2, B[1], B[0]); - lz4 l3_final (ZP_4, ZV_4, ZP_2_1, ZP_2_2, ZV_2_1, ZV_2_2); - - assign ZV = ZV_32 | ZV_16 | ZV_4; - assign ZP[5] = ~ZV_32; - assign ZP[4] = ZV_32 ? ZP_32[4] : ~ZV_16; - assign ZP[3:2] = ZV_32 ? ZP_32[3:2] : (ZV_16 ? ZP_16[3:2] : 2'b0); - assign ZP[1:0] = ZV_32 ? ZP_32[1:0] : (ZV_16 ? ZP_16[1:0] : ZP_4); - -endmodule // lz52 - - diff --git a/pipelined/src/fpu/sbtm_sqrt.sv b/pipelined/src/fpu/negateintres.sv similarity index 61% rename from pipelined/src/fpu/sbtm_sqrt.sv rename to pipelined/src/fpu/negateintres.sv index bc15a82f2..dde515b94 100644 --- a/pipelined/src/fpu/sbtm_sqrt.sv +++ b/pipelined/src/fpu/negateintres.sv @@ -1,9 +1,9 @@ /////////////////////////////////////////// // -// Written: James Stine -// Modified: 8/1/2018 +// Written: me@KatherineParry.com +// Modified: 7/5/2022 // -// Purpose: Bipartite Lookup for sqrt part of fpdivsqrt +// Purpose: Negate integer result // // A component of the Wally configurable RISC-V project. // @@ -26,43 +26,23 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "wally-config.vh" -module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y); +module negateintres( + input logic Xs, + input logic [`NORMSHIFTSZ-1:0] Shifted, + input logic Signed, + input logic Int64, + input logic Plus1, + output logic [1:0] CvtNegResMsbs, + output logic [`XLEN+1:0] CvtNegRes +); - // bit partitions - logic [4:0] x0; - logic [2:0] x1; - logic [3:0] x2; - logic [2:0] x2_1cmp; - // mem outputs - logic [13:0] y0; - logic [5:0] y1; - // input to CPA - logic [14:0] op1; - logic [14:0] op2; - logic [14:0] p; - logic cout; - - assign x0 = a[11:7]; - assign x1 = a[6:4]; - assign x2 = a[3:0]; - - sbtm_a2 mem1 ({x0, x1}, y0); - assign op1 = {y0, 1'b0}; - - // 1s cmp per sbtm/stam - assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; - sbtm_a3 mem2 ({x0, x2_1cmp}, y1); - // 1s cmp per sbtm/stam - assign op2 = x2[3] ? {{8{1'b1}}, ~y1, 1'b1} : - {8'b0, y1, 1'b1}; - - // CPA - assign {cout, p} = op1 + op2; - assign y = p[14:4]; - -endmodule // sbtm2 - - - + + // round and negate the positive res if needed + assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; + + assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] : + Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32]; +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/normshift.sv b/pipelined/src/fpu/normshift.sv index aa6942281..f382eed37 100644 --- a/pipelined/src/fpu/normshift.sv +++ b/pipelined/src/fpu/normshift.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: normalization shifter +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv new file mode 100644 index 000000000..8d11273a2 --- /dev/null +++ b/pipelined/src/fpu/otfc.sv @@ -0,0 +1,112 @@ +/////////////////////////////////////////// +// otfc.sv +// +// Written: me@KatherineParry.com, cturek@hmc.edu +// Modified:7/14/2022 +// +// Purpose: On the fly conversion +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module otfc2 ( + input logic qp, qz, + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext +); + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // Use this otfc for division only. + logic [`QLEN-2:0] QR, QMR; + + assign QR = Q[`QLEN-2:0]; + assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM + + always_comb begin + if (qp) begin + QNext = {QR, 1'b1}; + QMNext = {QR, 1'b0}; + end else if (qz) begin + QNext = {QR, 1'b0}; + QMNext = {QMR, 1'b1}; + end else begin // If qp and qz are not true, then qn is + QNext = {QMR, 1'b1}; + QMNext = {QMR, 1'b0}; + end + end + +endmodule + + +module otfc4 ( + input logic [3:0] q, + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext +); + + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // + // This code follows the psuedocode presented in the + // floating point chapter of the book. Right now, + // it is written for Radix-4 division. + // + // QM is Q-1. It allows us to write negative bits + // without using a costly CPA. + + // QR and QMR are the shifted versions of Q and QM. + // They are treated as [N-1:r] size signals, and + // discard the r most significant bits of Q and QM. + logic [`QLEN-3:0] QR, QMR; + + // shift Q (quotent) and QM (quotent-1) + // if q = 2 Q = {Q, 10} QM = {Q, 01} + // else if q = 1 Q = {Q, 01} QM = {Q, 00} + // else if q = 0 Q = {Q, 00} QM = {QM, 11} + // else if q = -1 Q = {QM, 11} QM = {QM, 10} + // else if q = -2 Q = {QM, 10} QM = {QM, 01} + + assign QR = Q[`QLEN-3:0]; + assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM + always_comb begin + if (q[3]) begin // +2 + QNext = {QR, 2'b10}; + QMNext = {QR, 2'b01}; + end else if (q[2]) begin // +1 + QNext = {QR, 2'b01}; + QMNext = {QR, 2'b00}; + end else if (q[1]) begin // -1 + QNext = {QMR, 2'b11}; + QMNext = {QMR, 2'b10}; + end else if (q[0]) begin // -2 + QNext = {QMR, 2'b10}; + QMNext = {QMR, 2'b01}; + end else begin // 0 + QNext = {QR, 2'b00}; + QMNext = {QMR, 2'b11}; + end + end + // Final Quoteint is in the range [.5, 2) + +endmodule diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 20cea2b61..f9ccd2553 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -1,9 +1,9 @@ /////////////////////////////////////////// // -// Written: Katherine Parry, David Harris -// Modified: 6/23/2021 +// Written: me@KatherineParry.com +// Modified: 7/5/2022 // -// Purpose: Floating point multiply-accumulate of configurable size +// Purpose: Post-Processing // // A component of the Wally configurable RISC-V project. // @@ -29,93 +29,92 @@ `include "wally-config.vh" -module postprocess( +module postprocess ( // general signals - input logic XSgnM, YSgnM, // input signs - input logic [`NE-1:0] ZExpM, // input exponents - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single - input logic [2:0] FOpCtrlM, // choose which opperation (look below for values) - input logic XZeroM, YZeroM, ZZeroM, // inputs are zero - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs - input logic ZDenormM, // is the original precision denormalized - input logic [1:0] PostProcSelM, // select result to be written to fp register + input logic Xs, Ys, // input signs + input logic [`NE-1:0] Ze, // input exponents + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, ZZero, // inputs are zero + input logic XInf, YInf, ZInf, // inputs are infinity + input logic XNaN, YNaN, ZNaN, // inputs are NaN + input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs + input logic ZDenorm, // is the original precision denormalized + input logic [1:0] PostProcSel, // select result to be written to fp register //fma signals - input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic [3*`NF+5:0] SumM, // the positive sum - input logic NegSumM, // was the sum negitive - input logic InvZM, // do you invert Z - input logic ZSgnEffM, // the modified Z sign - depends on instruction - input logic PSgnM, // the product's sign - input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count + input logic FmaAs, // the modified Z sign - depends on instruction + input logic FmaPs, // the product's sign + input logic [`NE+1:0] FmaPe, // Product exponent + input logic [3*`NF+5:0] FmaSm, // the positive sum + input logic FmaZmS, // sticky bit that is calculated during alignment + input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter + input logic FmaNegSum, // was the sum negitive + input logic FmaInvA, // do you invert Z + input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals - input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, - input logic [`NE+1:0] DivCalcExpM, // the calculated expoent - input logic DivStickyM, - input logic DivNegStickyM, - input logic [`DIVLEN+2:0] Quot, + input logic [`DURLEN-1:0] DivEarlyTermShift, + input logic DivS, + input logic DivDone, + input logic [`NE+1:0] DivQe, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, // conversion signals - input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic CvtResDenormUfM, - input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by - input logic CvtResSgnM, // the result's sign - input logic FWriteIntM, // is fp->int (since it's writting to the integer register) - input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) - input logic IntZeroM, // is the input zero + input logic CvtCs, // the result's sign + input logic [`NE:0] CvtCe, // the calculated expoent + input logic CvtResDenormUf, + input logic [`LOGCVTLEN-1:0] CvtShiftAmt, // how much to shift by + input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) + input logic IntZero, // is the input zero // final results - output logic [`FLEN-1:0] PostProcResM, // FMA final result - output logic [4:0] PostProcFlgM, - output logic [`XLEN-1:0] FCvtIntResM // the int conversion result + output logic [`FLEN-1:0] PostProcRes, // FMA final result + output logic [4:0] PostProcFlg, + output logic [`XLEN-1:0] FCvtIntRes // the int conversion result ); // general signals - logic [`NF-1:0] ResFrac; // Result fraction - logic [`NE-1:0] ResExp; // Result exponent - logic [`CORRSHIFTSZ-1:0] CorrShifted; // corectly shifted fraction - logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow - logic Sticky; // Sticky bit + logic Ws; + logic [`NF-1:0] Rf; // Result fraction + logic [`NE-1:0] Re; // Result exponent + logic Ms; + logic [`NE+1:0] Me; + logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction + logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) - logic Round; // bits needed to determine rounding - logic [`FLEN:0] RoundAdd; // how much to add to the result + logic R; // bits needed to determine rounding logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? logic IntInvalid, Overflow, Invalid; // flags - logic [`NE+1:0] RoundExp; - logic ResSgn; - logic RoundSgn; - logic UfLSBRes; + logic UfL; logic [`FMTBITS-1:0] OutFmt; // fma signals - logic [`NE+1:0] SumExp; // exponent of the normalized sum - logic SumZero; // is the sum zero - logic [3*`NF+8:0] FmaShiftIn; // is the sum zero - logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results - logic PreResultDenorm; // is the result denormalized - calculated before LZA corection + logic [`NE+1:0] FmaSe; // exponent of the normalized sum + logic FmaSZero; // is the sum zero + logic [3*`NF+8:0] FmaShiftIn; // shift input + logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results + logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count // division singals logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; - logic [`NE+1:0] CorrDivExp; + logic [`NE+1:0] Qe; logic DivByZero; logic DivResDenorm; logic [`NE+1:0] DivDenormShift; // conversion signals logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted - logic [1:0] NegResMSBS; + logic [1:0] CvtNegResMsbs; + logic [`XLEN+1:0] CvtNegRes; logic CvtResUf; // readability signals logic Mult; // multiply opperation logic Int64; // is the integer 64 bits? logic Signed; // is the opperation with a signed integer? logic IntToFp; // is the opperation an int->fp conversion? - logic ToInt; // is the opperation an fp->int conversion? logic CvtOp; logic FmaOp; logic DivOp; @@ -124,51 +123,55 @@ module postprocess( logic Sqrt; // signals to help readability - assign Signed = FOpCtrlM[0]; - assign Int64 = FOpCtrlM[1]; - assign IntToFp = FOpCtrlM[2]; - assign ToInt = FWriteIntM; - assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0]; - assign CvtOp = (PostProcSelM == 2'b00); - assign FmaOp = (PostProcSelM == 2'b10); - assign DivOp = (PostProcSelM == 2'b01); - assign Sqrt = FOpCtrlM[0]; + assign Signed = FOpCtrl[0]; + assign Int64 = FOpCtrl[1]; + assign IntToFp = FOpCtrl[2]; + assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; + assign CvtOp = (PostProcSel == 2'b00); + assign FmaOp = (PostProcSel == 2'b10); + assign DivOp = (PostProcSel == 2'b01)&DivDone; + assign Sqrt = FOpCtrl[0]; // is there an input of infinity or NaN being used - assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp); - assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp); + assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); + assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output - // - otherwise: FmtM contains the percision of the output + // - otherwise: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT); + assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0]; + assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// - cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM, - .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, - .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, + .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe, + .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb - case(PostProcSelM) + case(PostProcSel) 2'b10: begin // fma ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt}; ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}}; end 2'b00: begin // cvt - ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM}; + ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end - 2'b01: begin //div ***prob can take out - ShiftAmt = DivShiftAmt; - ShiftIn = DivShiftIn; + 2'b01: begin //div + if(DivDone) begin + ShiftAmt = DivShiftAmt; + ShiftIn = DivShiftIn; + end else begin + ShiftAmt = '0; + ShiftIn = '0; + end end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -178,9 +181,9 @@ module postprocess( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp, - .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExpM, - .CorrDivExp, .SumZero, .Shifted, .SumExp, .CorrShifted); + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe, + .DivResDenorm, .DivDenormShift, .DivOp, .DivQe, + .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -192,37 +195,41 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp, - .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, - .DivStickyM, .DivNegStickyM, - .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); + + roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, + .Xs, .Ys, .CvtCs, .Ms); + + round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, + .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .DivS, .DivDone, + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky, - .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, - .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn); + resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags /////////////////////////////////////////////////////////////////////////////// - flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM, - .XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM, - .XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero, - .UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1, - .RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM); + flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, + .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, + .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, + .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// // Select the result /////////////////////////////////////////////////////////////////////////////// - resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid, - .IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf, - .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS, - .XInfM, .YInfM, .DivOp, - .DivByZero, .FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM); + negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, + .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, + .XInf, .YInf, .DivOp, + .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv new file mode 100644 index 000000000..396ca7761 --- /dev/null +++ b/pipelined/src/fpu/qsel.sv @@ -0,0 +1,135 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module qsel2 ( // *** eventually just change to 4 bits + input logic [`DIVLEN+3:`DIVLEN] ps, pc, + output logic qp, qz//, qn +); + + logic [`DIVLEN+3:`DIVLEN] p, g; + logic magnitude, sign, cout; + + // The quotient selection logic is presented for simplicity, not + // for efficiency. You can probably optimize your logic to + // select the proper divisor with less delay. + + // Quotient equations from EE371 lecture notes 13-20 + assign p = ps ^ pc; + assign g = ps & pc; + + assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); + assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); + assign sign = p[`DIVLEN+3] ^ cout; +/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & + (ps[52]^pc[52])); + assign #1 sign = (ps[55]^pc[55])^ + (ps[54] & pc[54] | ((ps[54]^pc[54]) & + (ps[53]&pc[53] | ((ps[53]^pc[53]) & + (ps[52]&pc[52]))))); */ + + // Produce quotient = +1, 0, or -1 + assign qp = magnitude & ~sign; + assign qz = ~magnitude; +// assign #1 qn = magnitude & sign; +endmodule + +module qsel4 ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] WS, WC, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + + always_comb begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-15) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=15) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-24) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule diff --git a/pipelined/src/fpu/redundantmul.sv b/pipelined/src/fpu/redundantmul.sv deleted file mode 100644 index c1bd8f25a..000000000 --- a/pipelined/src/fpu/redundantmul.sv +++ /dev/null @@ -1,58 +0,0 @@ -/////////////////////////////////////////// -// redundantmul.sv -// -// Written: David_Harris@hmc.edu and ssanghai@hm.edu 10/11/2021 -// Modified: -// -// Purpose: multiplier with output in redundant carry-sum form -// This can be faster than a mutiplier that requires a final adder to obtain the nonredundant answer. -// The module has several implementations controlled by the DESIGN_COMPILER flag. -// When DESIGN_COMPILER = 1, use the Synopsys DesignWare DW02_multp block. This will give highest quality results -// but doesn't work in simulation or when using different tools -// When DESIGN_COMPILER = 2, use the Wally mult_cs block with Radix 2 Booth encoding and a Wallace Tree -// This simulates and synthesizes, but quality of results ae lower than DesignWare -// Otherwise, just use a nonredundant multiplier and set one word to 0. This is best for FPGAs, which have -// block multipliers, and also simulates fastest. -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module redundantmul #(parameter WIDTH =8)( - input logic [WIDTH-1:0] a,b, - output logic [2*WIDTH-1:0] out0, out1); - - if (`DESIGN_COMPILER == 1) begin:mul - logic [2*WIDTH-1+2:0] tmp_out0; - logic [2*WIDTH-1+2:0] tmp_out1; - - DW02_multp #(WIDTH, WIDTH, 2*WIDTH+2) mul(.a, .b, .tc(1'b0), .out0(tmp_out0), .out1(tmp_out1)); - assign out0 = tmp_out0[2*WIDTH-1:0]; - assign out1 = tmp_out1[2*WIDTH-1:0]; - end else begin:mul // force a nonredunant multipler. This will simulate properly and also is appropriate for FPGAs. - assign out0 = a * b; - assign out1 = 0; - end -endmodule - - diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv deleted file mode 100644 index 50ef1b6be..000000000 --- a/pipelined/src/fpu/resultselect.sv +++ /dev/null @@ -1,274 +0,0 @@ -`include "wally-config.vh" - -module resultselect( - input logic XSgnM, // input signs - input logic [`NE-1:0] ZExpM, // input exponents - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic InfIn, - input logic XInfM, YInfM, - input logic XZeroM, ZZeroM, - input logic IntZeroM, - input logic NaNIn, - input logic IntToFp, - input logic Int64, - input logic Signed, - input logic CvtOp, - input logic DivOp, - input logic FmaOp, - input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero - input logic Plus1, - input logic DivByZero, - input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic ZDenormM, // is the original precision denormalized - input logic ResSgn, // the res's sign - input logic [`FLEN:0] RoundAdd, // how much to add to the res - input logic IntInvalid, Invalid, Overflow, // flags - input logic CvtResUf, - input logic [`NE-1:0] ResExp, // Res exponent - input logic [`NE+1:0] FullResExp, // Res exponent - input logic [`NF-1:0] ResFrac, // Res fraction - output logic [`FLEN-1:0] PostProcResM, // final res - output logic [1:0] NegResMSBS, - output logic [`XLEN-1:0] FCvtIntResM // final res -); - logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results - logic OfResMax; - logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output - logic [`XLEN+1:0] NegRes; // the negation of the result - logic KillRes; - logic SelOfRes; - - - // does the overflow result output the maximum normalized floating point number - // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn)); - - if (`FPSIZES == 1) begin - - //NaN res selection depending on standard - if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)}; - assign NormRes = {ResSgn, ResExp, ResFrac}; - - end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? - if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - - assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]}; - - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {ResSgn, ResExp, ResFrac}; - end - `FMT1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]}; - end - `FMT2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]}; - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]}; - end - default: begin - if(`IEEE754) begin - XNaNRes = (`FLEN)'(0); - YNaNRes = (`FLEN)'(0); - ZNaNRes = (`FLEN)'(0); - InvalidRes = (`FLEN)'(0); - end else begin - InvalidRes = (`FLEN)'(0); - end - OfRes = (`FLEN)'(0); - UfRes = (`FLEN)'(0); - NormRes = (`FLEN)'(0); - end - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {ResSgn, ResExp, ResFrac}; - end - 2'h1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]}; - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]}; - end - 2'h0: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]}; - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]}; - end - 2'h2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]}; - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; - // zero is exact fi dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]}; - end - endcase - - end - - - - - - // determine if you shoould kill the res - Cvt - // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 - // - dont set to zero if fp input is zero but not using the fp input - // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1); - assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp)); - // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes : - YNaNM&~CvtOp ? YNaNRes : - ZNaNM&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign PostProcResM = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end - - /////////////////////////////////////////////////////////////////////////////////////// - // - // ||||||||||| ||| ||| ||||||||||||| - // ||| |||||| ||| ||| - // ||| ||| ||| ||| ||| - // ||| ||| |||||| ||| - // ||||||||||| ||| ||| ||| - // - /////////////////////////////////////////////////////////////////////////////////////// - - // *** probably can optimize the negation - // select the overflow integer res - // - negitive infinity and out of range negitive input - // | int | long | - // signed | -2^31 | -2^63 | - // unsigned | 0 | 0 | - // - // - positive infinity and out of range negitive input and NaNs - // | int | long | - // signed | 2^31-1 | 2^63-1 | - // unsigned | 2^32-1 | 2^64-1 | - // - // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive - - // round and negate the positive res if needed - assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; - - //*** false critical path probably - assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] : - Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32]; - - // select the integer output - // - if the input is invalid (out of bounds NaN or Inf) then output overflow res - // - if the input underflows - // - if rounding and signed opperation and negitive input, output -1 - // - otherwise output a rounded 0 - // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntResM = IntInvalid ? OfIntRes : - CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]}; -endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index 9a76cf8f3..e1ea5e410 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -1,55 +1,63 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: calculating the result's sign +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" module resultsign( - input logic [2:0] FrmM, - input logic PSgnM, ZSgnEffM, - input logic InvZM, - input logic XSgnM, - input logic YSgnM, - input logic ZInfM, + input logic [2:0] Frm, + input logic FmaPs, FmaAs, + input logic ZInf, input logic InfIn, - input logic NegSumM, input logic FmaOp, - input logic DivOp, - input logic CvtOp, - input logic [`NE+1:0] SumExp, - input logic SumZero, + input logic [`NE+1:0] FmaSe, + input logic FmaSZero, input logic Mult, - input logic Round, - input logic Sticky, - input logic CvtResSgnM, - output logic RoundSgn, - output logic ResSgn + input logic R, + input logic S, + input logic Ms, + output logic Ws ); - logic ZeroSgn; - logic InfSgn; - logic FmaResSgn; - logic FmaResSgnTmp; - logic Underflow; - logic DivSgn; - // logic ResultSgnTmp; + logic Zeros; + logic Infs; // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity // if multiply then Psgn // otherwise psign - assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)); - assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM; + assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM); - assign InfSgn = ZInfM ? ZSgnEffM : PSgnM; - assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp; - - assign DivSgn = XSgnM^YSgnM; - - // Sign for rounding calulation - assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); - assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); + assign Infs = ZInf ? FmaAs : FmaPs; + assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 532e17290..6132dba4a 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -1,3 +1,31 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Rounder +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" // what position is XLEN in? // options: @@ -9,51 +37,48 @@ module round( input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single - input logic [2:0] FrmM, // rounding mode + input logic [2:0] Frm, // rounding mode input logic FmaOp, input logic DivOp, input logic CvtOp, input logic ToInt, - input logic [1:0] PostProcSelM, - input logic CvtResDenormUfM, + input logic DivDone, + input logic [1:0] PostProcSel, + input logic CvtResDenormUf, input logic CvtResUf, - input logic [`CORRSHIFTSZ-1:0] CorrShifted, - input logic AddendStickyM, // addend's sticky bit - input logic ZZeroM, // is Z zero - input logic InvZM, // invert Z - input logic [`NE+1:0] SumExp, // exponent of the normalized sum - input logic RoundSgn, // the result's sign - input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic [`NE+1:0] CorrDivExp, // the calculated expoent - input logic DivStickyM, // sticky bit - input logic DivNegStickyM, + input logic [`CORRSHIFTSZ-1:0] Mf, + input logic FmaZmS, // addend's sticky bit + input logic [`NE+1:0] FmaSe, // exponent of the normalized sum + input logic Ms, // the result's sign + input logic [`NE:0] CvtCe, // the calculated expoent + input logic [`NE+1:0] Qe, // the calculated expoent + input logic DivS, // sticky bit output logic UfPlus1, // do you add or subtract on from the result - output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow - output logic [`NF-1:0] ResFrac, // Result fraction - output logic [`NE-1:0] ResExp, // Result exponent - output logic Sticky, // sticky bit - output logic [`NE+1:0] RoundExp, + output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [`NF-1:0] Rf, // Result fraction + output logic [`NE-1:0] Re, // Result exponent + output logic S, // sticky bit + output logic [`NE+1:0] Me, output logic Plus1, - output logic [`FLEN:0] RoundAdd, // how much to add to the result - output logic Round, UfLSBRes // bits needed to calculate rounding + output logic R, UfL // bits needed to calculate rounding ); - logic LSBRes; // bit used for rounding - least significant bit of the normalized sum - logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number - logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result - logic NormSumSticky; // normalized sum's sticky bit - logic UfSticky; // sticky bit for underlow calculation + logic L; // bit used for rounding - least significant bit of the normalized sum + logic UfCalcPlus1; + logic NormS; // normalized sum's sticky bit + logic UfS; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; logic FpRes, IntRes; - logic UfRound; + logic UfR; logic FpRound, FpLSBRes, FpUfRound; logic CalcPlus1, FpPlus1; + logic [`FLEN:0] RoundAdd; // how much to add to the result /////////////////////////////////////////////////////////////////////////////// // Rounding /////////////////////////////////////////////////////////////////////////////// // round to nearest even - // {Round, Sticky} + // {R, S} // 0x - do nothing // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) // - don't add 1 if a small number was supposed to be subtracted @@ -71,7 +96,7 @@ module round( // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 // round to nearest max magnitude - // {Guard, Round, Sticky} + // {Guard, R, S} // 0x - do nothing // 10 - tie - Plus1 // - don't add 1 if a small number was supposed to be subtracted @@ -89,61 +114,61 @@ module round( // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN - if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | - (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | - (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | - (|CorrShifted[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | - (|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); end @@ -151,37 +176,37 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp | DivStickyM&DivOp; + assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (`FPSIZES == 1) begin - assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1]; - assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF]; - assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end else if (`FPSIZES == 2) begin - assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1]; - assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1]; - assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; end else if (`FPSIZES == 3) begin always_comb case (OutFmt) `FMT: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end `FMT1: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF1]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2]; end `FMT2: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF2]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2]; end default: begin FpRound = 1'bx; @@ -193,130 +218,97 @@ module round( always_comb case (OutFmt) 2'h3: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; end 2'h1: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2]; end 2'h0: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2]; end 2'h2: begin - FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1]; - FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF]; - FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2]; end endcase end - assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound; - assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes; - assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; + assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound; + assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes; + assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; // used to determine underflow flag - assign UfLSBRes = FpRound; + assign UfL = FpRound; // determine sticky - assign Sticky = UfSticky | UfRound; - - - // Deterimine if a small number was supposed to be subtrated - // - for FMA or if division has a negitive sticky bit - assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound); - assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky; + assign S = UfS | UfR; always_comb begin // Determine if you add 1 - case (FrmM) - 3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even + case (Frm) + 3'b000: CalcPlus1 = R & (S| L);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = RoundSgn & ~(SubBySmallNum & ~Round);//round down - 3'b011: CalcPlus1 = ~RoundSgn & ~(SubBySmallNum & ~Round);//round up - 3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up + 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) - case (FrmM) - 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even + case (Frm) + 3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = RoundSgn & ~(UfSubBySmallNum & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~RoundSgn & ~(UfSubBySmallNum & ~UfRound);//round up - 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase - // Determine if you subtract 1 - case (FrmM) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero - 3'b010: CalcMinus1 = ~RoundSgn & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = RoundSgn & ~Round & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase end // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Round); + assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky - assign Minus1 = CalcMinus1 & (Sticky | Round); + assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky // Compute rounded result if (`FPSIZES == 1) begin - assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1}; + assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; end else if (`FPSIZES == 2) begin // \/FLEN+1 // | NE+2 | NF | // '-NE+2-^----NF1----^ // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} : - Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; end else if (`FPSIZES == 3) begin - always_comb begin - case (OutFmt) - `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; - `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; - default: RoundAdd = (`FLEN+1)'(0); - endcase - end + assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; - end else if (`FPSIZES == 4) begin - always_comb begin - case (OutFmt) - 2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - 2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; - 2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; - 2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; - endcase - end - - end + end else if (`FPSIZES == 4) + assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; // determine the result to be roundned - assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; always_comb - case(PostProcSelM) - 2'b10: RoundExp = SumExp; // fma - 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = CorrDivExp; // divide - default: RoundExp = 0; + case(PostProcSel) + 2'b10: Me = FmaSe; // fma + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt + 2'b01: Me = DivDone ? Qe : '0; // divide + default: Me = '0; endcase // round the result // - if the fraction overflows one should be added to the exponent - assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd; - assign ResExp = FullResExp[`NE-1:0]; + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; + assign Re = FullRe[`NE-1:0]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/rounder_denorm.sv b/pipelined/src/fpu/rounder_denorm.sv deleted file mode 100755 index 3c9a0e91f..000000000 --- a/pipelined/src/fpu/rounder_denorm.sv +++ /dev/null @@ -1,259 +0,0 @@ -// The rounder takes as input logics a 64-bit value to be rounded, A, the -// exponent of the value to be rounded, the sign of the final result, Sign, -// the precision of the results, P, and the two-bit rounding mode, rm. -// It produces a rounded 52-bit result, Z, the exponent of the rounded -// result, Z_exp, and a flag that indicates if the result was rounded, -// Inexact. The rounding mode has the following values. -// rm Modee -// 00 round-to-nearest-even -// 01 round-toward-zero -// 10 round-toward-plus infinity -// 11 round-toward-minus infinity -// The rounding algorithm determines if '1' should be added to the -// truncated signficant result, based on three significant bits -// (least (L), round (R) and sticky (S)), the rounding mode (rm) -// and the sign of the final result (Sign). Visually, L and R appear as -// xxxxxL,Rxxxxxxx -// where , denotes the rounding boundary. S is the logical OR of all the -// bits to the right of R. - -module rounder ( - input logic [2:0] rm, - input logic P, - input logic OvEn, - input logic UnEn, - input logic exp_valid, - input logic [3:0] sel_inv, - input logic Invalid, - input logic DenormIn, - input logic Asign, - input logic [10:0] Aexp, - input logic [5:0] norm_shift, - input logic [63:0] A, - input logic [10:0] exponent_postsum, - input logic A_Norm, - input logic B_Norm, - input logic [11:0] exp_A_unmodified, - input logic [11:0] exp_B_unmodified, - input logic normal_overflow, - input logic normal_underflow, - input logic swap, - input logic [2:0] op_type, - input logic [63:0] sum, - - output logic [63:0] Result, - output logic DenormIO, - output logic [4:0] Flags -); - - wire Rsign; - wire Sticky_out; - wire [51:0] ShiftMant; - wire [63:0] ShiftMant_64; - wire [10:0] Rexp; - wire [10:0] Rexp_denorm; - wire [11:0] Texp; //Parallelized for denorm exponent - wire [11:0] Texp_addone; //results - wire [11:0] Texp_subone; - wire [51:0] Rmant; - wire [51:0] Tmant; - wire Rzero; - wire VSS = 1'b0; - wire VDD = 1'b1; - wire [51:0] B; // Value used to add the "ones" - wire [11:0] B_12_overflow; // Value used to add one to exponent - wire [11:0] B_12_underflow; // Value used to subtract one from exponent - wire S_SP; // Single precision sticky bit - wire S_DP; // Double precision sticky bit - wire S; // Actual sticky bit - wire R; // Round bit - wire L; // Least significant bit - wire add_one; // '1' if one should be added - wire UnFlow_SP, UnFlow_DP, UnderFlow; - wire OvFlow_SP, OvFlow_DP, OverFlow; - wire Inexact; - wire Round_zero; - wire Infinite; - wire VeryLarge; - wire Largest; - wire Adj_exp; - wire Valid; - wire NaN; - wire Cout; - wire Cout_overflow; - wire Texp_l7z; - wire Texp_l7o; - - // Determine the sticky bits for double and single precision - assign S_DP= A[9]|A[8]|A[7]|A[6]|A[5]|A[4]|A[3]|A[2]|A[1]|A[0]; - assign S_SP = S_DP |A[38]|A[37]|A[36]|A[35]|A[34]|A[33]|A[32]|A[31]|A[30]| - A[29]|A[28]|A[27]|A[26]|A[25]|A[24]|A[23]|A[22]|A[21]|A[20]| - A[19]|A[18]|A[17]|A[16]|A[15]|A[14]|A[13]|A[12]|A[11]|A[10]; - - // Set the least (L), round (R), and sticky (S) bits based on - // the precision. - assign {L, R, S} = P ? {A[40],A[39],S_SP} : {A[11],A[10],S_DP}; - - // Add one if ((the rounding mode is round-to-nearest) and (R is one) and - // (S or L is one)) or ((the rounding mode is towards plus or minus - // infinity (rm[1] = 1)) and (the sign and rm[0] are the same) and - // (R or S is one)). - - assign add_one = ~rm[2] & ((~rm[1]&~rm[0]&R&(L|S)) | (rm[1]&(Asign^~rm[0])&(R|S))) | (rm[2] & R); - - // Add one using a 52-bit adder. The one is added to the LSB B[0] for - // double precision or to B[29] for single precision. - // This could be simplified by using a specialized adder. - // The current adder is actually 64-bits. The leading one - // for normalized results in not included in the addition. - assign B = {{22{VSS}}, add_one&P, {28{VSS}}, add_one&~P}; - assign B_12_overflow = {8'h0, 3'b0, normal_overflow}; - assign B_12_underflow = {8'h0, 3'b0, normal_underflow}; - - cla52 add1(Tmant, Cout, A[62:11], B); //***adder - - cla12 add1_exp(Texp_addone, Cout_overflow, Texp, B_12_overflow); //***adder - - cla_sub12 sub1_exp(Texp_subone, Texp, B_12_underflow); //***adder - - // Now that rounding is done, we compute the final exponent - // and test for special cases. - - // Compute the value of the exponent by subtracting the shift - // value from the previous exponent and then adding 2 + cout. - // If needed this could be optimized to used a specialized - // adder. - - assign Texp = DenormIn ? ({1'b0, exponent_postsum}) : ({VSS, Aexp} - {{6{VSS}}, norm_shift} +{{10{VSS}}, VDD, Cout}); - - // Overflow only occurs for double precision, if Texp[10] to Texp[0] are - // all ones. To encourage sharing with single precision overflow detection, - // the lower 7 bits are tested separately. - assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0]; - assign OvFlow_DP = Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o; - - // Overflow occurs for single precision if (Texp[10] is one) and - // ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0] - // are all ones. - assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o); - - // Underflow occurs for double precision if (Texp[11] is one) or Texp[10] to - // Texp[0] are all zeros. - assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0]; - assign UnFlow_DP = Texp[11] | ~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z; - - // Underflow occurs for single precision if (Texp[10] is zero) and - // (Texp[9] or Texp[8] or Texp[7]) is zero. - assign UnFlow_SP = (~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z)); - - // Set the overflow and underflow flags. They should not be set if - // the input logic was infinite or NaN or the output logic of the adder is zero. - // 00 = Valid - // 10 = NaN - assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]); - assign NaN = ~sel_inv[2]&~sel_inv[1]& sel_inv[0]; - assign UnderFlow = ((P & UnFlow_SP | UnFlow_DP)&Valid&exp_valid) | - (~Aexp[10]&Aexp[9]&Aexp[8]&Aexp[7]&~Aexp[6] - &~Aexp[5]&~Aexp[4]&~Aexp[3]&~Aexp[2] - &~Aexp[1]&~Aexp[0]&sel_inv[3]); - assign OverFlow = (P & OvFlow_SP | OvFlow_DP)&Valid&~UnderFlow&exp_valid; - - // The DenormIO is set if underflow has occurred or if their was a - // denormalized input logic. - assign DenormIO = DenormIn | UnderFlow; - - // The final result is Inexact if any rounding occurred ((i.e., R or S - // is one), or (if the result overflows ) or (if the result underflows and the - // underflow trap is not enabled)) and (value of the result was not previous set - // by an exception case). - assign Inexact = (R|S|OverFlow|(UnderFlow&~UnEn))&Valid; - - // Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0, - // Invlalid. - assign Flags = {UnderFlow, VSS, OverFlow, Invalid, Inexact}; - - // Determine the final result. - - // The sign of the final result is one if the result is not zero and - // the sign of A is one, or if the result is zero and the the rounding - // mode is round-to-minus infinity. The final result is zero, if exp_valid - // is zero. If underflow occurs, then the result is set to zero. - // - // For Zero (goes equally for subtraction although - // signs may alter operands sign): - // -0 + -0 = -0 (always) - // +0 + +0 = +0 (always) - // -0 + +0 = +0 (for RN, RZ, RU) - // -0 + +0 = -0 (for RD) - assign Rzero = ~exp_valid | UnderFlow; - assign Rsign = DenormIn ? - ( ~(op_type[1] | op_type[0]) ? - ( (sum[63] & (A_Norm | B_Norm) & (exp_A_unmodified[11] ^ exp_B_unmodified[11])) ? - ~Asign : Asign) - : ( ((A_Norm ^ B_Norm) & (exp_A_unmodified[11] ~^ exp_B_unmodified[11])) ? - (normal_underflow ? ~Asign : Asign) : Asign) - ) - : ( ((Asign&exp_valid | - (sel_inv[2]&~sel_inv[1]&sel_inv[0]&rm[1]&rm[0] | - sel_inv[2]&sel_inv[1]&~sel_inv[0] | - ~exp_valid&rm[1]&rm[0]&~sel_inv[2] | - UnderFlow&rm[1]&rm[0])) & ~sel_inv[3]) | - (Asign & sel_inv[3]) ); - - // The exponent of the final result is zero if the final result is - // zero or a denorm, all ones if the final result is NaN or Infinite - // or overflow occurred and the magnitude of the number is - // not rounded toward from zero, and all ones with an LSB of zero - // if overflow occurred and the magnitude of the number is - // rounded toward zero. If the result is single precision, - // Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1) - // and overflow occurs and the operation is not conversion, bits 10 and 9 are - // inverted for double precision, and bits 7 and 6 are inverted for single precision. - assign Round_zero = ~rm[1]&rm[0] | ~Asign&rm[0] | Asign&rm[1]&~rm[0]; - assign VeryLarge = OverFlow & ~OvEn; - assign Infinite = (VeryLarge & ~Round_zero) | (~sel_inv[2] & sel_inv[1]); - assign Largest = VeryLarge & Round_zero; - assign Adj_exp = OverFlow & OvEn; - assign Rexp[10:1] = ({10{~Valid}} | - {Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], - (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | - {10{VeryLarge}})&{10{~Rzero | NaN}}; - assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest; - - // The denormalized rounded exponent uses the overflow/underflow values - // computed in the fpadd component to round the exponent up or down - // Depending on the operation and the signs of the orignal operands, - // underflow may or may not be needed to round. - assign Rexp_denorm = DenormIn ? - ((~op_type[1] & op_type[0]) ? - ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] == exp_B_unmodified[11])) ? - ( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) - : ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) - : ( ((A_Norm != B_Norm) & (exp_A_unmodified[11] != exp_B_unmodified[11])) ? - ( (normal_overflow == normal_underflow) ? Texp[10:0] : (normal_overflow ? Texp_addone[10:0] : Texp_subone[10:0]) ) - : ( normal_overflow ? Texp_addone[10:0] : Texp[10:0] ) ) - ) : - Rexp; //KEP used to be all of exp_A_unmodified - - // If the result is zero or infinity, the mantissa is all zeros. - // If the result is NaN, the mantissa is 10...0 - // If the result the largest floating point number, the mantissa - // is all ones. Otherwise, the mantissa is not changed. - // If operation is denormalized, take the mantissa directly from - // its normalized value. - assign Rmant[51] = Largest | NaN | (Tmant[51]&~Infinite&~Rzero); - assign Rmant[50:0] = {51{Largest}} | (Tmant[50:0]&{51{~Infinite&Valid&~Rzero}}); - - assign ShiftMant = A[51:0]; - - // For single precision, the 8 least significant bits of the exponent - // and 23 most significant bits of the mantissa contain bits used - // for the final result. A double precision result is returned if - // overflow has occurred, the overflow trap is enabled, and a conversion - // is being performed. - - assign Result = DenormIn ? {Rsign, Rexp_denorm, ShiftMant} : (P ? {{32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]} - : {Rsign, Rexp, Rmant}); - -endmodule // rounder - diff --git a/pipelined/src/fpu/rounder_div.sv b/pipelined/src/fpu/rounder_div.sv deleted file mode 100755 index 43045ee57..000000000 --- a/pipelined/src/fpu/rounder_div.sv +++ /dev/null @@ -1,212 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Floating point divider/square root rounder unit (Goldschmidt) -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module rounder_div ( - input logic [1:0] rm, - input logic P, - input logic OvEn, - input logic UnEn, - input logic [12:0] exp_diff, - input logic [2:0] sel_inv, - input logic Invalid, - input logic SignR, - input logic [63:0] Float1, - input logic [63:0] Float2, - input logic XNaNQ, - input logic YNaNQ, - input logic XZeroQ, - input logic YZeroQ, - input logic XInfQ, - input logic YInfQ, - input logic op_type, - input logic [59:0] q1, - input logic [59:0] qm1, - input logic [59:0] qp1, - input logic [59:0] q0, - input logic [59:0] qm0, - input logic [59:0] qp0, - input logic [119:0] regr_out, - - output logic [63:0] Result, - output logic [4:0] Flags - ); - - logic Rsign; - logic [10:0] Rexp; - logic [12:0] Texp; - logic [51:0] Rmant; - logic [59:0] Tmant; - logic [51:0] Smant; - logic Rzero; - logic Gdp, Gsp, G; - logic UnFlow_SP, UnFlow_DP, UnderFlow; - logic OvFlow_SP, OvFlow_DP, OverFlow; - logic Inexact; - logic Round_zero; - logic Infinite; - logic VeryLarge; - logic Largest; - logic Div0; - logic Adj_exp; - logic Valid; - logic NaN; - logic Texp_l7z; - logic Texp_l7o; - logic OvCon; - logic zero_rem; - logic [1:0] mux_mant; - logic sign_rem; - logic [59:0] q, qm, qp; - logic exp_ovf; - - logic [50:0] NaN_out; - logic NaN_Sign_out; - logic Sign_out; - - // Remainder = 0? - assign zero_rem = ~(|regr_out); - // Remainder Sign - assign sign_rem = ~regr_out[119]; - // choose correct Guard bit [1,2) or [0,1) - assign Gdp = q1[59] ? q1[6] : q0[6]; - assign Gsp = q1[59] ? q1[35] : q0[35]; - assign G = P ? Gsp : Gdp; - // Selection of Rounding (from logic/switching) - assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) | - (!rm[1]&!rm[0]&G&!sign_rem) | - (SignR&rm[1]&rm[0]&!zero_rem&!sign_rem) | - (!SignR&rm[1]&!rm[0]&!zero_rem&!sign_rem); - assign mux_mant[0] = (!SignR&rm[0]&!G&!zero_rem&sign_rem) | - (!rm[1]&rm[0]&!G&!zero_rem&sign_rem) | - (SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem); - - // Which Q? - mux2 #(60) mx1 (q0, q1, q1[59], q); - mux2 #(60) mx2 (qm0, qm1, q1[59], qm); - mux2 #(60) mx3 (qp0, qp1, q1[59], qp); - // Choose Q, Q+1, Q-1 - mux3 #(60) mx4 (q, qm, qp, mux_mant, Tmant); - assign Smant = Tmant[58:7]; - // Compute the value of the exponent - // exponent is modified if we choose: - // 1.) we choose any qm0, qp0, q0 (since we shift mant) - // 2.) we choose qp and we overflow (for RU) - assign exp_ovf = |{qp[58:36], (qp[35:7] & {29{~P}})}; - assign Texp = exp_diff - {{12{1'b0}}, ~q1[59]} + {{12{1'b0}}, mux_mant[1]&qp1[59]&~exp_ovf}; - - // Overflow only occurs for double precision, if Texp[10] to Texp[0] are - // all ones. To encourage sharing with single precision overflow detection, - // the lower 7 bits are tested separately. - assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0]; - assign OvFlow_DP = (~Texp[12]&Texp[11]) | (Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o); - - // Overflow occurs for single precision if (Texp[10] is one) and - // ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0] - // are all ones. - assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o); - - // Underflow occurs for double precision if (Texp[11]/Texp[10] is one) or - // Texp[10] to Texp[0] are all zeros. - assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0]; - assign UnFlow_DP = (Texp[12]&Texp[11]) | ~Texp[11]&~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z; - - // Underflow occurs for single precision if (Texp[10] is zero) and - // (Texp[9] or Texp[8] or Texp[7]) is zero. - assign UnFlow_SP = ~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z); - - // Set the overflow and underflow flags. They should not be set if - // the input was infinite or NaN or the output of the adder is zero. - // 00 = Valid - // 10 = NaN - assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0]; - assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0]; - assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid; - assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid; - assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN; - - // The final result is Inexact if any rounding occurred ((i.e., R or S - // is one), or (if the result overflows ) or (if the result underflows and the - // underflow trap is not enabled)) and (value of the result was not previous set - // by an exception case). - assign Inexact = (G|~zero_rem|OverFlow|(UnderFlow&~UnEn))&Valid; - - // Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0, - // Invlalid. - assign Flags = {Inexact, UnderFlow, OverFlow, Div0, Invalid}; - - // Determine sign - assign Rzero = UnderFlow | (~sel_inv[2]&sel_inv[1]&sel_inv[0]); - assign Rsign = SignR; - - // The exponent of the final result is zero if the final result is - // zero or a denorm, all ones if the final result is NaN or Infinite - // or overflow occurred and the magnitude of the number is - // not rounded toward from zero, and all ones with an LSB of zero - // if overflow occurred and the magnitude of the number is - // rounded toward zero. If the result is single precision, - // Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1) - // and overflow occurs and the operation is not conversion, bits 10 and 9 are - // inverted for double precision, and bits 7 and 6 are inverted for single precision. - assign Round_zero = ~rm[1]&rm[0] | ~SignR&rm[0] | SignR&rm[1]&~rm[0]; - assign VeryLarge = OverFlow & ~OvEn; - assign Infinite = (VeryLarge & ~Round_zero) | sel_inv[1]; - assign Largest = VeryLarge & Round_zero; - assign Adj_exp = OverFlow & OvEn; - assign Rexp[10:1] = ({10{~Valid}} | - {Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], - (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | - {10{VeryLarge}})&{10{~Rzero | NaN}}; - assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest; - - // If the result is zero or infinity, the mantissa is all zeros. - // If the result is NaN, the mantissa is 10...0 - // If the result the largest floating point number, the mantissa - // is all ones. Otherwise, the mantissa is not changed. - assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0]; - assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63]; - assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ | - NaN_Sign_out&(XNaNQ|YNaNQ); - // FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1 - // | Float1[63]&op_type; (logic to fix this but removed for now) - - assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero); - assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) | - (NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}}); - - // For single precision, the 8 least significant bits of the exponent - // and 23 most significant bits of the mantissa contain bits used - // for the final result. A double precision result is returned if - // overflow has occurred, the overflow trap is enabled, and a conversion - // is being performed. - assign OvCon = OverFlow & OvEn; - assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]} - : {Sign_out, Rexp, Rmant}; - -endmodule // rounder - diff --git a/pipelined/src/fpu/sbtm_div.sv b/pipelined/src/fpu/roundsign.sv similarity index 59% rename from pipelined/src/fpu/sbtm_div.sv rename to pipelined/src/fpu/roundsign.sv index 3daa5accb..55e322bc3 100644 --- a/pipelined/src/fpu/sbtm_div.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -1,9 +1,9 @@ /////////////////////////////////////////// // -// Written: James Stine -// Modified: 8/1/2018 +// Written: me@KatherineParry.com +// Modified: 7/5/2022 // -// Purpose: Bipartite Lookup for divide portion of fpdivsqrt +// Purpose: Sign calculation ofr rounding // // A component of the Wally configurable RISC-V project. // @@ -26,37 +26,35 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// +`include "wally-config.vh" -module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out); +module roundsign( + input logic FmaPs, FmaAs, + input logic FmaInvA, + input logic Xs, + input logic Ys, + input logic FmaNegSum, + input logic FmaOp, + input logic DivOp, + input logic CvtOp, + input logic CvtCs, + output logic Ms +); - // bit partitions - logic [3:0] x0; - logic [2:0] x1; - logic [3:0] x2; - logic [2:0] x2_1cmp; - // mem outputs - logic [12:0] y0; - logic [4:0] y1; - // input to CPA - logic [14:0] op1; - logic [14:0] op2; - logic [14:0] p; - logic cout; + logic FmaResSgnTmp; + logic Qs; - assign x0 = a[10:7]; - assign x1 = a[6:4]; - assign x2 = a[3:0]; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage - sbtm_a0 mem1 ({x0, x1}, y0); - // 1s cmp per sbtm/stam - assign x2_1cmp = x2[3] ? ~x2[2:0] : x2[2:0]; - sbtm_a1 mem2 ({x0, x2_1cmp}, y1); - assign op1 = {1'b0, y0, 1'b0}; - // 1s cmp per sbtm/stam - assign op2 = x2[3] ? {1'b1, {8{1'b1}}, ~y1, 1'b1} : - {1'b0, 8'b0, y1, 1'b1}; - // CPA - assign {cout, p} = op1 + op2; - assign ia_out = p[14:4]; + // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs); -endmodule // sbtm + assign Qs = Xs^Ys; + + // Sign for rounding calulation + assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/sbtm_a0.sv b/pipelined/src/fpu/sbtm_a0.sv deleted file mode 100644 index f06350289..000000000 --- a/pipelined/src/fpu/sbtm_a0.sv +++ /dev/null @@ -1,170 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Bipartite Lookup -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module sbtm_a0 (input logic [6:0] a, - output logic [12:0] y); - - always_comb - case(a) - 7'b0000000: y = 13'b1111111100010; - 7'b0000001: y = 13'b1111110100011; - 7'b0000010: y = 13'b1111101100101; - 7'b0000011: y = 13'b1111100101000; - 7'b0000100: y = 13'b1111011101100; - 7'b0000101: y = 13'b1111010110000; - 7'b0000110: y = 13'b1111001110110; - 7'b0000111: y = 13'b1111000111100; - 7'b0001000: y = 13'b1111000000100; - 7'b0001001: y = 13'b1110111001100; - 7'b0001010: y = 13'b1110110010101; - 7'b0001011: y = 13'b1110101011110; - 7'b0001100: y = 13'b1110100101001; - 7'b0001101: y = 13'b1110011110100; - 7'b0001110: y = 13'b1110011000000; - 7'b0001111: y = 13'b1110010001101; - 7'b0010000: y = 13'b1110001011010; - 7'b0010001: y = 13'b1110000101000; - 7'b0010010: y = 13'b1101111110111; - 7'b0010011: y = 13'b1101111000110; - 7'b0010100: y = 13'b1101110010111; - 7'b0010101: y = 13'b1101101100111; - 7'b0010110: y = 13'b1101100111001; - 7'b0010111: y = 13'b1101100001011; - 7'b0011000: y = 13'b1101011011101; - 7'b0011001: y = 13'b1101010110001; - 7'b0011010: y = 13'b1101010000100; - 7'b0011011: y = 13'b1101001011001; - 7'b0011100: y = 13'b1101000101110; - 7'b0011101: y = 13'b1101000000011; - 7'b0011110: y = 13'b1100111011001; - 7'b0011111: y = 13'b1100110101111; - 7'b0100000: y = 13'b1100110000110; - 7'b0100001: y = 13'b1100101011110; - 7'b0100010: y = 13'b1100100110110; - 7'b0100011: y = 13'b1100100001111; - 7'b0100100: y = 13'b1100011101000; - 7'b0100101: y = 13'b1100011000001; - 7'b0100110: y = 13'b1100010011011; - 7'b0100111: y = 13'b1100001110101; - 7'b0101000: y = 13'b1100001010000; - 7'b0101001: y = 13'b1100000101011; - 7'b0101010: y = 13'b1100000000111; - 7'b0101011: y = 13'b1011111100011; - 7'b0101100: y = 13'b1011111000000; - 7'b0101101: y = 13'b1011110011101; - 7'b0101110: y = 13'b1011101111010; - 7'b0101111: y = 13'b1011101011000; - 7'b0110000: y = 13'b1011100110110; - 7'b0110001: y = 13'b1011100010101; - 7'b0110010: y = 13'b1011011110011; - 7'b0110011: y = 13'b1011011010011; - 7'b0110100: y = 13'b1011010110010; - 7'b0110101: y = 13'b1011010010010; - 7'b0110110: y = 13'b1011001110011; - 7'b0110111: y = 13'b1011001010011; - 7'b0111000: y = 13'b1011000110100; - 7'b0111001: y = 13'b1011000010110; - 7'b0111010: y = 13'b1010111110111; - 7'b0111011: y = 13'b1010111011001; - 7'b0111100: y = 13'b1010110111100; - 7'b0111101: y = 13'b1010110011110; - 7'b0111110: y = 13'b1010110000001; - 7'b0111111: y = 13'b1010101100100; - 7'b1000000: y = 13'b1010101001000; - 7'b1000001: y = 13'b1010100101100; - 7'b1000010: y = 13'b1010100010000; - 7'b1000011: y = 13'b1010011110100; - 7'b1000100: y = 13'b1010011011001; - 7'b1000101: y = 13'b1010010111110; - 7'b1000110: y = 13'b1010010100011; - 7'b1000111: y = 13'b1010010001001; - 7'b1001000: y = 13'b1010001101111; - 7'b1001001: y = 13'b1010001010101; - 7'b1001010: y = 13'b1010000111011; - 7'b1001011: y = 13'b1010000100001; - 7'b1001100: y = 13'b1010000001000; - 7'b1001101: y = 13'b1001111101111; - 7'b1001110: y = 13'b1001111010111; - 7'b1001111: y = 13'b1001110111110; - 7'b1010000: y = 13'b1001110100110; - 7'b1010001: y = 13'b1001110001110; - 7'b1010010: y = 13'b1001101110110; - 7'b1010011: y = 13'b1001101011111; - 7'b1010100: y = 13'b1001101000111; - 7'b1010101: y = 13'b1001100110000; - 7'b1010110: y = 13'b1001100011001; - 7'b1010111: y = 13'b1001100000010; - 7'b1011000: y = 13'b1001011101100; - 7'b1011001: y = 13'b1001011010110; - 7'b1011010: y = 13'b1001011000000; - 7'b1011011: y = 13'b1001010101010; - 7'b1011100: y = 13'b1001010010100; - 7'b1011101: y = 13'b1001001111111; - 7'b1011110: y = 13'b1001001101001; - 7'b1011111: y = 13'b1001001010100; - 7'b1100000: y = 13'b1001000111111; - 7'b1100001: y = 13'b1001000101011; - 7'b1100010: y = 13'b1001000010110; - 7'b1100011: y = 13'b1001000000010; - 7'b1100100: y = 13'b1000111101110; - 7'b1100101: y = 13'b1000111011010; - 7'b1100110: y = 13'b1000111000110; - 7'b1100111: y = 13'b1000110110010; - 7'b1101000: y = 13'b1000110011111; - 7'b1101001: y = 13'b1000110001011; - 7'b1101010: y = 13'b1000101111000; - 7'b1101011: y = 13'b1000101100101; - 7'b1101100: y = 13'b1000101010010; - 7'b1101101: y = 13'b1000101000000; - 7'b1101110: y = 13'b1000100101101; - 7'b1101111: y = 13'b1000100011011; - 7'b1110000: y = 13'b1000100001001; - 7'b1110001: y = 13'b1000011110110; - 7'b1110010: y = 13'b1000011100101; - 7'b1110011: y = 13'b1000011010011; - 7'b1110100: y = 13'b1000011000001; - 7'b1110101: y = 13'b1000010110000; - 7'b1110110: y = 13'b1000010011110; - 7'b1110111: y = 13'b1000010001101; - 7'b1111000: y = 13'b1000001111100; - 7'b1111001: y = 13'b1000001101011; - 7'b1111010: y = 13'b1000001011010; - 7'b1111011: y = 13'b1000001001010; - 7'b1111100: y = 13'b1000000111001; - 7'b1111101: y = 13'b1000000101001; - 7'b1111110: y = 13'b1000000011001; - 7'b1111111: y = 13'b1000000001001; - default: y = 13'bxxxxxxxxxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 - - - - diff --git a/pipelined/src/fpu/sbtm_a1.sv b/pipelined/src/fpu/sbtm_a1.sv deleted file mode 100644 index 60505b2e9..000000000 --- a/pipelined/src/fpu/sbtm_a1.sv +++ /dev/null @@ -1,170 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Bipartite Lookup -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module sbtm_a1 (input logic [6:0] a, - output logic [4:0] y); - - always_comb - case(a) - 7'b0000000: y = 5'b11100; - 7'b0000001: y = 5'b11000; - 7'b0000010: y = 5'b10100; - 7'b0000011: y = 5'b10000; - 7'b0000100: y = 5'b01101; - 7'b0000101: y = 5'b01001; - 7'b0000110: y = 5'b00101; - 7'b0000111: y = 5'b00001; - 7'b0001000: y = 5'b11001; - 7'b0001001: y = 5'b10101; - 7'b0001010: y = 5'b10010; - 7'b0001011: y = 5'b01111; - 7'b0001100: y = 5'b01011; - 7'b0001101: y = 5'b01000; - 7'b0001110: y = 5'b00101; - 7'b0001111: y = 5'b00001; - 7'b0010000: y = 5'b10110; - 7'b0010001: y = 5'b10011; - 7'b0010010: y = 5'b10000; - 7'b0010011: y = 5'b01101; - 7'b0010100: y = 5'b01010; - 7'b0010101: y = 5'b00111; - 7'b0010110: y = 5'b00100; - 7'b0010111: y = 5'b00001; - 7'b0011000: y = 5'b10100; - 7'b0011001: y = 5'b10001; - 7'b0011010: y = 5'b01110; - 7'b0011011: y = 5'b01100; - 7'b0011100: y = 5'b01001; - 7'b0011101: y = 5'b00110; - 7'b0011110: y = 5'b00100; - 7'b0011111: y = 5'b00001; - 7'b0100000: y = 5'b10010; - 7'b0100001: y = 5'b01111; - 7'b0100010: y = 5'b01101; - 7'b0100011: y = 5'b01010; - 7'b0100100: y = 5'b01000; - 7'b0100101: y = 5'b00110; - 7'b0100110: y = 5'b00011; - 7'b0100111: y = 5'b00001; - 7'b0101000: y = 5'b10000; - 7'b0101001: y = 5'b01110; - 7'b0101010: y = 5'b01100; - 7'b0101011: y = 5'b01001; - 7'b0101100: y = 5'b00111; - 7'b0101101: y = 5'b00101; - 7'b0101110: y = 5'b00011; - 7'b0101111: y = 5'b00001; - 7'b0110000: y = 5'b01111; - 7'b0110001: y = 5'b01101; - 7'b0110010: y = 5'b01011; - 7'b0110011: y = 5'b01001; - 7'b0110100: y = 5'b00111; - 7'b0110101: y = 5'b00101; - 7'b0110110: y = 5'b00011; - 7'b0110111: y = 5'b00001; - 7'b0111000: y = 5'b01101; - 7'b0111001: y = 5'b01100; - 7'b0111010: y = 5'b01010; - 7'b0111011: y = 5'b01000; - 7'b0111100: y = 5'b00110; - 7'b0111101: y = 5'b00100; - 7'b0111110: y = 5'b00010; - 7'b0111111: y = 5'b00000; - 7'b1000000: y = 5'b01100; - 7'b1000001: y = 5'b01011; - 7'b1000010: y = 5'b01001; - 7'b1000011: y = 5'b00111; - 7'b1000100: y = 5'b00101; - 7'b1000101: y = 5'b00100; - 7'b1000110: y = 5'b00010; - 7'b1000111: y = 5'b00000; - 7'b1001000: y = 5'b01011; - 7'b1001001: y = 5'b01010; - 7'b1001010: y = 5'b01000; - 7'b1001011: y = 5'b00111; - 7'b1001100: y = 5'b00101; - 7'b1001101: y = 5'b00011; - 7'b1001110: y = 5'b00010; - 7'b1001111: y = 5'b00000; - 7'b1010000: y = 5'b01010; - 7'b1010001: y = 5'b01001; - 7'b1010010: y = 5'b01000; - 7'b1010011: y = 5'b00110; - 7'b1010100: y = 5'b00101; - 7'b1010101: y = 5'b00011; - 7'b1010110: y = 5'b00010; - 7'b1010111: y = 5'b00000; - 7'b1011000: y = 5'b01010; - 7'b1011001: y = 5'b01000; - 7'b1011010: y = 5'b00111; - 7'b1011011: y = 5'b00110; - 7'b1011100: y = 5'b00100; - 7'b1011101: y = 5'b00011; - 7'b1011110: y = 5'b00010; - 7'b1011111: y = 5'b00000; - 7'b1100000: y = 5'b01001; - 7'b1100001: y = 5'b01000; - 7'b1100010: y = 5'b00110; - 7'b1100011: y = 5'b00101; - 7'b1100100: y = 5'b00100; - 7'b1100101: y = 5'b00011; - 7'b1100110: y = 5'b00001; - 7'b1100111: y = 5'b00000; - 7'b1101000: y = 5'b01000; - 7'b1101001: y = 5'b00111; - 7'b1101010: y = 5'b00110; - 7'b1101011: y = 5'b00101; - 7'b1101100: y = 5'b00100; - 7'b1101101: y = 5'b00010; - 7'b1101110: y = 5'b00001; - 7'b1101111: y = 5'b00000; - 7'b1110000: y = 5'b01000; - 7'b1110001: y = 5'b00111; - 7'b1110010: y = 5'b00110; - 7'b1110011: y = 5'b00100; - 7'b1110100: y = 5'b00011; - 7'b1110101: y = 5'b00010; - 7'b1110110: y = 5'b00001; - 7'b1110111: y = 5'b00000; - 7'b1111000: y = 5'b00111; - 7'b1111001: y = 5'b00110; - 7'b1111010: y = 5'b00101; - 7'b1111011: y = 5'b00100; - 7'b1111100: y = 5'b00011; - 7'b1111101: y = 5'b00010; - 7'b1111110: y = 5'b00001; - 7'b1111111: y = 5'b00000; - default: y = 5'bxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 - - - - diff --git a/pipelined/src/fpu/sbtm_a2.sv b/pipelined/src/fpu/sbtm_a2.sv deleted file mode 100755 index c54a994ea..000000000 --- a/pipelined/src/fpu/sbtm_a2.sv +++ /dev/null @@ -1,234 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Bipartite Lookup -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module sbtm_a2 (input logic [7:0] a, - output logic [13:0] y); - - always_comb - case(a) - 8'b01000000: y = 14'b10110100010111; - 8'b01000001: y = 14'b10110010111111; - 8'b01000010: y = 14'b10110001101000; - 8'b01000011: y = 14'b10110000010011; - 8'b01000100: y = 14'b10101111000001; - 8'b01000101: y = 14'b10101101110000; - 8'b01000110: y = 14'b10101100100001; - 8'b01000111: y = 14'b10101011010011; - 8'b01001000: y = 14'b10101010000111; - 8'b01001001: y = 14'b10101000111101; - 8'b01001010: y = 14'b10100111110100; - 8'b01001011: y = 14'b10100110101101; - 8'b01001100: y = 14'b10100101100111; - 8'b01001101: y = 14'b10100100100010; - 8'b01001110: y = 14'b10100011011111; - 8'b01001111: y = 14'b10100010011101; - 8'b01010000: y = 14'b10100001011100; - 8'b01010001: y = 14'b10100000011100; - 8'b01010010: y = 14'b10011111011110; - 8'b01010011: y = 14'b10011110100001; - 8'b01010100: y = 14'b10011101100100; - 8'b01010101: y = 14'b10011100101001; - 8'b01010110: y = 14'b10011011101111; - 8'b01010111: y = 14'b10011010110110; - 8'b01011000: y = 14'b10011001111110; - 8'b01011001: y = 14'b10011001000110; - 8'b01011010: y = 14'b10011000010000; - 8'b01011011: y = 14'b10010111011011; - 8'b01011100: y = 14'b10010110100110; - 8'b01011101: y = 14'b10010101110011; - 8'b01011110: y = 14'b10010101000000; - 8'b01011111: y = 14'b10010100001110; - 8'b01100000: y = 14'b10010011011100; - 8'b01100001: y = 14'b10010010101100; - 8'b01100010: y = 14'b10010001111100; - 8'b01100011: y = 14'b10010001001101; - 8'b01100100: y = 14'b10010000011111; - 8'b01100101: y = 14'b10001111110001; - 8'b01100110: y = 14'b10001111000100; - 8'b01100111: y = 14'b10001110011000; - 8'b01101000: y = 14'b10001101101100; - 8'b01101001: y = 14'b10001101000001; - 8'b01101010: y = 14'b10001100010110; - 8'b01101011: y = 14'b10001011101100; - 8'b01101100: y = 14'b10001011000011; - 8'b01101101: y = 14'b10001010011010; - 8'b01101110: y = 14'b10001001110010; - 8'b01101111: y = 14'b10001001001010; - 8'b01110000: y = 14'b10001000100011; - 8'b01110001: y = 14'b10000111111101; - 8'b01110010: y = 14'b10000111010111; - 8'b01110011: y = 14'b10000110110001; - 8'b01110100: y = 14'b10000110001100; - 8'b01110101: y = 14'b10000101100111; - 8'b01110110: y = 14'b10000101000011; - 8'b01110111: y = 14'b10000100011111; - 8'b01111000: y = 14'b10000011111100; - 8'b01111001: y = 14'b10000011011001; - 8'b01111010: y = 14'b10000010110111; - 8'b01111011: y = 14'b10000010010101; - 8'b01111100: y = 14'b10000001110011; - 8'b01111101: y = 14'b10000001010010; - 8'b01111110: y = 14'b10000000110001; - 8'b01111111: y = 14'b10000000010001; - 8'b10000000: y = 14'b01111111110001; - 8'b10000001: y = 14'b01111111010001; - 8'b10000010: y = 14'b01111110110010; - 8'b10000011: y = 14'b01111110010011; - 8'b10000100: y = 14'b01111101110101; - 8'b10000101: y = 14'b01111101010110; - 8'b10000110: y = 14'b01111100111001; - 8'b10000111: y = 14'b01111100011011; - 8'b10001000: y = 14'b01111011111110; - 8'b10001001: y = 14'b01111011100001; - 8'b10001010: y = 14'b01111011000100; - 8'b10001011: y = 14'b01111010101000; - 8'b10001100: y = 14'b01111010001100; - 8'b10001101: y = 14'b01111001110000; - 8'b10001110: y = 14'b01111001010101; - 8'b10001111: y = 14'b01111000111010; - 8'b10010000: y = 14'b01111000011111; - 8'b10010001: y = 14'b01111000000100; - 8'b10010010: y = 14'b01110111101010; - 8'b10010011: y = 14'b01110111010000; - 8'b10010100: y = 14'b01110110110110; - 8'b10010101: y = 14'b01110110011101; - 8'b10010110: y = 14'b01110110000100; - 8'b10010111: y = 14'b01110101101011; - 8'b10011000: y = 14'b01110101010010; - 8'b10011001: y = 14'b01110100111001; - 8'b10011010: y = 14'b01110100100001; - 8'b10011011: y = 14'b01110100001001; - 8'b10011100: y = 14'b01110011110001; - 8'b10011101: y = 14'b01110011011010; - 8'b10011110: y = 14'b01110011000010; - 8'b10011111: y = 14'b01110010101011; - 8'b10100000: y = 14'b01110010010100; - 8'b10100001: y = 14'b01110001111110; - 8'b10100010: y = 14'b01110001100111; - 8'b10100011: y = 14'b01110001010001; - 8'b10100100: y = 14'b01110000111011; - 8'b10100101: y = 14'b01110000100101; - 8'b10100110: y = 14'b01110000001111; - 8'b10100111: y = 14'b01101111111010; - 8'b10101000: y = 14'b01101111100101; - 8'b10101001: y = 14'b01101111010000; - 8'b10101010: y = 14'b01101110111011; - 8'b10101011: y = 14'b01101110100110; - 8'b10101100: y = 14'b01101110010001; - 8'b10101101: y = 14'b01101101111101; - 8'b10101110: y = 14'b01101101101001; - 8'b10101111: y = 14'b01101101010101; - 8'b10110000: y = 14'b01101101000001; - 8'b10110001: y = 14'b01101100101101; - 8'b10110010: y = 14'b01101100011010; - 8'b10110011: y = 14'b01101100000110; - 8'b10110100: y = 14'b01101011110011; - 8'b10110101: y = 14'b01101011100000; - 8'b10110110: y = 14'b01101011001101; - 8'b10110111: y = 14'b01101010111010; - 8'b10111000: y = 14'b01101010101000; - 8'b10111001: y = 14'b01101010010101; - 8'b10111010: y = 14'b01101010000011; - 8'b10111011: y = 14'b01101001110001; - 8'b10111100: y = 14'b01101001011111; - 8'b10111101: y = 14'b01101001001101; - 8'b10111110: y = 14'b01101000111100; - 8'b10111111: y = 14'b01101000101010; - 8'b11000000: y = 14'b01101000011001; - 8'b11000001: y = 14'b01101000000111; - 8'b11000010: y = 14'b01100111110110; - 8'b11000011: y = 14'b01100111100101; - 8'b11000100: y = 14'b01100111010100; - 8'b11000101: y = 14'b01100111000011; - 8'b11000110: y = 14'b01100110110011; - 8'b11000111: y = 14'b01100110100010; - 8'b11001000: y = 14'b01100110010010; - 8'b11001001: y = 14'b01100110000010; - 8'b11001010: y = 14'b01100101110010; - 8'b11001011: y = 14'b01100101100001; - 8'b11001100: y = 14'b01100101010010; - 8'b11001101: y = 14'b01100101000010; - 8'b11001110: y = 14'b01100100110010; - 8'b11001111: y = 14'b01100100100011; - 8'b11010000: y = 14'b01100100010011; - 8'b11010001: y = 14'b01100100000100; - 8'b11010010: y = 14'b01100011110101; - 8'b11010011: y = 14'b01100011100101; - 8'b11010100: y = 14'b01100011010110; - 8'b11010101: y = 14'b01100011000111; - 8'b11010110: y = 14'b01100010111001; - 8'b11010111: y = 14'b01100010101010; - 8'b11011000: y = 14'b01100010011011; - 8'b11011001: y = 14'b01100010001101; - 8'b11011010: y = 14'b01100001111110; - 8'b11011011: y = 14'b01100001110000; - 8'b11011100: y = 14'b01100001100010; - 8'b11011101: y = 14'b01100001010100; - 8'b11011110: y = 14'b01100001000110; - 8'b11011111: y = 14'b01100000111000; - 8'b11100000: y = 14'b01100000101010; - 8'b11100001: y = 14'b01100000011100; - 8'b11100010: y = 14'b01100000001111; - 8'b11100011: y = 14'b01100000000001; - 8'b11100100: y = 14'b01011111110100; - 8'b11100101: y = 14'b01011111100110; - 8'b11100110: y = 14'b01011111011001; - 8'b11100111: y = 14'b01011111001100; - 8'b11101000: y = 14'b01011110111111; - 8'b11101001: y = 14'b01011110110010; - 8'b11101010: y = 14'b01011110100101; - 8'b11101011: y = 14'b01011110011000; - 8'b11101100: y = 14'b01011110001011; - 8'b11101101: y = 14'b01011101111110; - 8'b11101110: y = 14'b01011101110010; - 8'b11101111: y = 14'b01011101100101; - 8'b11110000: y = 14'b01011101011001; - 8'b11110001: y = 14'b01011101001100; - 8'b11110010: y = 14'b01011101000000; - 8'b11110011: y = 14'b01011100110100; - 8'b11110100: y = 14'b01011100101000; - 8'b11110101: y = 14'b01011100011100; - 8'b11110110: y = 14'b01011100010000; - 8'b11110111: y = 14'b01011100000100; - 8'b11111000: y = 14'b01011011111000; - 8'b11111001: y = 14'b01011011101100; - 8'b11111010: y = 14'b01011011100000; - 8'b11111011: y = 14'b01011011010101; - 8'b11111100: y = 14'b01011011001001; - 8'b11111101: y = 14'b01011010111101; - 8'b11111110: y = 14'b01011010110010; - 8'b11111111: y = 14'b01011010100111; - default: y = 14'bxxxxxxxxxxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 - - - - diff --git a/pipelined/src/fpu/sbtm_a3.sv b/pipelined/src/fpu/sbtm_a3.sv deleted file mode 100755 index 0f3c6e647..000000000 --- a/pipelined/src/fpu/sbtm_a3.sv +++ /dev/null @@ -1,230 +0,0 @@ -/////////////////////////////////////////// -// -// Written: James Stine -// Modified: 8/1/2018 -// -// Purpose: Bipartite Lookup -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module sbtm_a3 (input logic [7:0] a, - output logic [5:0] y); - - always_comb - case(a) - 8'b01000000: y = 6'b100110; - 8'b01000001: y = 6'b100001; - 8'b01000010: y = 6'b011100; - 8'b01000011: y = 6'b010111; - 8'b01000100: y = 6'b010010; - 8'b01000101: y = 6'b001100; - 8'b01000110: y = 6'b000111; - 8'b01000111: y = 6'b000010; - 8'b01001000: y = 6'b100000; - 8'b01001001: y = 6'b011100; - 8'b01001010: y = 6'b011000; - 8'b01001011: y = 6'b010011; - 8'b01001100: y = 6'b001111; - 8'b01001101: y = 6'b001010; - 8'b01001110: y = 6'b000110; - 8'b01001111: y = 6'b000010; - 8'b01010000: y = 6'b011100; - 8'b01010001: y = 6'b011000; - 8'b01010010: y = 6'b010100; - 8'b01010011: y = 6'b010000; - 8'b01010100: y = 6'b001101; - 8'b01010101: y = 6'b001001; - 8'b01010110: y = 6'b000101; - 8'b01010111: y = 6'b000001; - 8'b01011000: y = 6'b011000; - 8'b01011001: y = 6'b010101; - 8'b01011010: y = 6'b010010; - 8'b01011011: y = 6'b001110; - 8'b01011100: y = 6'b001011; - 8'b01011101: y = 6'b001000; - 8'b01011110: y = 6'b000100; - 8'b01011111: y = 6'b000001; - 8'b01100000: y = 6'b010101; - 8'b01100001: y = 6'b010010; - 8'b01100010: y = 6'b001111; - 8'b01100011: y = 6'b001101; - 8'b01100100: y = 6'b001010; - 8'b01100101: y = 6'b000111; - 8'b01100110: y = 6'b000100; - 8'b01100111: y = 6'b000001; - 8'b01101000: y = 6'b010011; - 8'b01101001: y = 6'b010000; - 8'b01101010: y = 6'b001110; - 8'b01101011: y = 6'b001011; - 8'b01101100: y = 6'b001001; - 8'b01101101: y = 6'b000110; - 8'b01101110: y = 6'b000011; - 8'b01101111: y = 6'b000001; - 8'b01110000: y = 6'b010001; - 8'b01110001: y = 6'b001111; - 8'b01110010: y = 6'b001100; - 8'b01110011: y = 6'b001010; - 8'b01110100: y = 6'b001000; - 8'b01110101: y = 6'b000101; - 8'b01110110: y = 6'b000011; - 8'b01110111: y = 6'b000001; - 8'b01111000: y = 6'b001111; - 8'b01111001: y = 6'b001101; - 8'b01111010: y = 6'b001011; - 8'b01111011: y = 6'b001001; - 8'b01111100: y = 6'b000111; - 8'b01111101: y = 6'b000101; - 8'b01111110: y = 6'b000011; - 8'b01111111: y = 6'b000001; - 8'b10000000: y = 6'b001110; - 8'b10000001: y = 6'b001100; - 8'b10000010: y = 6'b001010; - 8'b10000011: y = 6'b001000; - 8'b10000100: y = 6'b000110; - 8'b10000101: y = 6'b000100; - 8'b10000110: y = 6'b000010; - 8'b10000111: y = 6'b000000; - 8'b10001000: y = 6'b001101; - 8'b10001001: y = 6'b001011; - 8'b10001010: y = 6'b001001; - 8'b10001011: y = 6'b000111; - 8'b10001100: y = 6'b000110; - 8'b10001101: y = 6'b000100; - 8'b10001110: y = 6'b000010; - 8'b10001111: y = 6'b000000; - 8'b10010000: y = 6'b001100; - 8'b10010001: y = 6'b001010; - 8'b10010010: y = 6'b001000; - 8'b10010011: y = 6'b000111; - 8'b10010100: y = 6'b000101; - 8'b10010101: y = 6'b000100; - 8'b10010110: y = 6'b000010; - 8'b10010111: y = 6'b000000; - 8'b10011000: y = 6'b001011; - 8'b10011001: y = 6'b001001; - 8'b10011010: y = 6'b001000; - 8'b10011011: y = 6'b000110; - 8'b10011100: y = 6'b000101; - 8'b10011101: y = 6'b000011; - 8'b10011110: y = 6'b000010; - 8'b10011111: y = 6'b000000; - 8'b10100000: y = 6'b001010; - 8'b10100001: y = 6'b001000; - 8'b10100010: y = 6'b000111; - 8'b10100011: y = 6'b000110; - 8'b10100100: y = 6'b000100; - 8'b10100101: y = 6'b000011; - 8'b10100110: y = 6'b000010; - 8'b10100111: y = 6'b000000; - 8'b10101000: y = 6'b001001; - 8'b10101001: y = 6'b001000; - 8'b10101010: y = 6'b000111; - 8'b10101011: y = 6'b000101; - 8'b10101100: y = 6'b000100; - 8'b10101101: y = 6'b000011; - 8'b10101110: y = 6'b000001; - 8'b10101111: y = 6'b000000; - 8'b10110000: y = 6'b001000; - 8'b10110001: y = 6'b000111; - 8'b10110010: y = 6'b000110; - 8'b10110011: y = 6'b000101; - 8'b10110100: y = 6'b000100; - 8'b10110101: y = 6'b000010; - 8'b10110110: y = 6'b000001; - 8'b10110111: y = 6'b000000; - 8'b10111000: y = 6'b001000; - 8'b10111001: y = 6'b000111; - 8'b10111010: y = 6'b000110; - 8'b10111011: y = 6'b000101; - 8'b10111100: y = 6'b000011; - 8'b10111101: y = 6'b000010; - 8'b10111110: y = 6'b000001; - 8'b10111111: y = 6'b000000; - 8'b11000000: y = 6'b000111; - 8'b11000001: y = 6'b000110; - 8'b11000010: y = 6'b000101; - 8'b11000011: y = 6'b000100; - 8'b11000100: y = 6'b000011; - 8'b11000101: y = 6'b000010; - 8'b11000110: y = 6'b000001; - 8'b11000111: y = 6'b000000; - 8'b11001000: y = 6'b000111; - 8'b11001001: y = 6'b000110; - 8'b11001010: y = 6'b000101; - 8'b11001011: y = 6'b000100; - 8'b11001100: y = 6'b000011; - 8'b11001101: y = 6'b000010; - 8'b11001110: y = 6'b000001; - 8'b11001111: y = 6'b000000; - 8'b11010000: y = 6'b000111; - 8'b11010001: y = 6'b000110; - 8'b11010010: y = 6'b000101; - 8'b11010011: y = 6'b000100; - 8'b11010100: y = 6'b000011; - 8'b11010101: y = 6'b000010; - 8'b11010110: y = 6'b000001; - 8'b11010111: y = 6'b000000; - 8'b11011000: y = 6'b000110; - 8'b11011001: y = 6'b000101; - 8'b11011010: y = 6'b000100; - 8'b11011011: y = 6'b000011; - 8'b11011100: y = 6'b000011; - 8'b11011101: y = 6'b000010; - 8'b11011110: y = 6'b000001; - 8'b11011111: y = 6'b000000; - 8'b11100000: y = 6'b000110; - 8'b11100001: y = 6'b000101; - 8'b11100010: y = 6'b000100; - 8'b11100011: y = 6'b000011; - 8'b11100100: y = 6'b000010; - 8'b11100101: y = 6'b000010; - 8'b11100110: y = 6'b000001; - 8'b11100111: y = 6'b000000; - 8'b11101000: y = 6'b000101; - 8'b11101001: y = 6'b000101; - 8'b11101010: y = 6'b000100; - 8'b11101011: y = 6'b000011; - 8'b11101100: y = 6'b000010; - 8'b11101101: y = 6'b000001; - 8'b11101110: y = 6'b000001; - 8'b11101111: y = 6'b000000; - 8'b11110000: y = 6'b000101; - 8'b11110001: y = 6'b000100; - 8'b11110010: y = 6'b000100; - 8'b11110011: y = 6'b000011; - 8'b11110100: y = 6'b000010; - 8'b11110101: y = 6'b000001; - 8'b11110110: y = 6'b000001; - 8'b11110111: y = 6'b000000; - 8'b11111000: y = 6'b000101; - 8'b11111001: y = 6'b000100; - 8'b11111010: y = 6'b000011; - 8'b11111011: y = 6'b000011; - 8'b11111100: y = 6'b000010; - 8'b11111101: y = 6'b000001; - 8'b11111110: y = 6'b000001; - 8'b11111111: y = 6'b000000; - default: y = 6'bxxxxxx; - endcase // case (a) - -endmodule // sbtm_a0 diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv new file mode 100644 index 000000000..71a2393a6 --- /dev/null +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -0,0 +1,68 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: shift correction +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// +`include "wally-config.vh" + +module shiftcorrection( + input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction + input logic FmaOp, + input logic DivOp, + input logic DivResDenorm, + input logic [`NE+1:0] DivQe, + input logic [`NE+1:0] DivDenormShift, + input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results + input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection + input logic FmaSZero, + output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [`NE+1:0] Qe, + output logic [`NE+1:0] FmaSe // exponent of the normalized sum +); + logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction + logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; + logic ResDenorm; // is the result denormalized + logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction + + // LZA correction + assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2]; + assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1]; + // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone + assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; + // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) + assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits + assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + // Determine sum's exponent + // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 + assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + // recalculate if the result is denormalized + assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; + + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift + assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2}; +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shifter_denorm.sv b/pipelined/src/fpu/shifter_denorm.sv deleted file mode 100755 index b354433f9..000000000 --- a/pipelined/src/fpu/shifter_denorm.sv +++ /dev/null @@ -1,164 +0,0 @@ - -// MJS - This module implements a 57-bit 2-to-1 multiplexor, which is -// used in the barrel shifter for significand alignment. - -module mux21x57 (Z, A, B, Sel); - - input [56:0] A; - input [56:0] B; - input Sel; - - output [56:0] Z; - - assign Z = Sel ? B : A; - -endmodule // mux21x57 - -// MJS - This module implements a 64-bit 2-to-1 multiplexor, which is -// used in the barrel shifter for significand normalization. - -module mux21x64 (Z, A, B, Sel); - - input [63:0] A; - input [63:0] B; - input Sel; - - output [63:0] Z; - - assign Z = Sel ? B : A; - -endmodule // mux21x64 - -// The implementation of the barrel shifter was modified to use -// fewer gates. It is now implemented using six 64-bit 2-to-1 muxes. The -// barrel shifter takes a 64-bit input A and shifts it left by up to -// 63-bits, as specified by Shift, to produce a 63-bit output Z. -// Bits to the right are filled with zeros. -// The 64 bit shift is implemented using 6 stages of shifts of 32 -// 16, 8, 4, 2, and 1 bit shifts. - -module barrel_shifter_l64 (Z, A, Shift); - - input [63:0] A; - input [5:0] Shift; - - wire [63:0] stage1; - wire [63:0] stage2; - wire [63:0] stage3; - wire [63:0] stage4; - wire [63:0] stage5; - wire [31:0] thirtytwozeros = 32'h0; - wire [15:0] sixteenzeros = 16'h0; - wire [ 7:0] eightzeros = 8'h0; - wire [ 3:0] fourzeros = 4'h0; - wire [ 1:0] twozeros = 2'b00; - wire onezero = 1'b0; - - output [63:0] Z; - - mux21x64 mx01(stage1, A, {A[31:0], thirtytwozeros}, Shift[5]); - mux21x64 mx02(stage2, stage1, {stage1[47:0], sixteenzeros}, Shift[4]); - mux21x64 mx03(stage3, stage2, {stage2[55:0], eightzeros}, Shift[3]); - mux21x64 mx04(stage4, stage3, {stage3[59:0], fourzeros}, Shift[2]); - mux21x64 mx05(stage5, stage4, {stage4[61:0], twozeros}, Shift[1]); - mux21x64 mx06(Z , stage5, {stage5[62:0], onezero}, Shift[0]); - -endmodule // barrel_shifter_l63 - -// The implementation of the barrel shifter was modified to use -// fewer gates. It is now implemented using six 57-bit 2-to-1 muxes. The -// barrel shifter takes a 57-bit input A and right shifts it by up to -// 63-bits, as specified by Shift, to produce a 57-bit output Z. -// It also computes a Sticky bit, which is set to -// one if any of the bits that were shifted out was one. -// Bits shifted into the left are filled with zeros. -// The 63 bit shift is implemented using 6 stages of shifts of 32 -// 16, 8, 4, 2, and 1 bits. - -module barrel_shifter_r57 (Z, Sticky, A, Shift); - - input [56:0] A; - input [5:0] Shift; - - output Sticky; - output [56:0] Z; - - wire [56:0] stage1; - wire [56:0] stage2; - wire [56:0] stage3; - wire [56:0] stage4; - wire [56:0] stage5; - wire [62:0] sixtythreezeros = 63'h0; - wire [31:0] thirtytwozeros = 32'h0; - wire [15:0] sixteenzeros = 16'h0; - wire [ 7:0] eightzeros = 8'h0; - wire [ 3:0] fourzeros = 4'h0; - wire [ 1:0] twozeros = 2'b00; - wire onezero = 1'b0; - wire [62:0] S; - - // Shift operations - mux21x57 mx01(stage1, A, {thirtytwozeros, A[56:32]}, Shift[5]); - mux21x57 mx02(stage2, stage1, {sixteenzeros, stage1[56:16]}, Shift[4]); - mux21x57 mx03(stage3, stage2, {eightzeros, stage2[56:8]}, Shift[3]); - mux21x57 mx04(stage4, stage3, {fourzeros, stage3[56:4]}, Shift[2]); - mux21x57 mx05(stage5, stage4, {twozeros, stage4[56:2]}, Shift[1]); - mux21x57 mx06(Z , stage5, {onezero, stage5[56:1]}, Shift[0]); - - // Sticky bit calculation. The Sticky bit is set to one if any of the - // bits that were shifter out were one - - assign S[31:0] = {32{Shift[5]}} & A[31:0]; - assign S[47:32] = {16{Shift[4]}} & stage1[15:0]; - assign S[55:48] = { 8{Shift[3]}} & stage2[7:0]; - assign S[59:56] = { 4{Shift[2]}} & stage3[3:0]; - assign S[61:60] = { 2{Shift[1]}} & stage4[1:0]; - assign S[62] = Shift[0] & stage5[0]; - assign Sticky = (S != sixtythreezeros); - -endmodule // barrel_shifter_r57 - -/* -module barrel_shifter_r64 (Z, Sticky, A, Shift); - - input [63:0] A; - input [5:0] Shift; - - output Sticky; - output [63:0] Z; - - wire [63:0] stage1; - wire [63:0] stage2; - wire [63:0] stage3; - wire [63:0] stage4; - wire [63:0] stage5; - wire [62:0] sixtythreezeros = 63'h0; - wire [31:0] thirtytwozeros = 32'h0; - wire [15:0] sixteenzeros = 16'h0; - wire [ 7:0] eightzeros = 8'h0; - wire [ 3:0] fourzeros = 4'h0; - wire [ 1:0] twozeros = 2'b00; - wire onezero = 1'b0; - wire [62:0] S; - - // Shift operations - mux21x64 mx01(stage1, A, {thirtytwozeros, A[63:32]}, Shift[5]); - mux21x64 mx02(stage2, stage1, {sixteenzeros, stage1[63:16]}, Shift[4]); - mux21x64 mx03(stage3, stage2, {eightzeros, stage2[63:8]}, Shift[3]); - mux21x64 mx04(stage4, stage3, {fourzeros, stage3[63:4]}, Shift[2]); - mux21x64 mx05(stage5, stage4, {twozeros, stage4[63:2]}, Shift[1]); - mux21x64 mx06(Z , stage5, {onezero, stage5[63:1]}, Shift[0]); - - // Sticky bit calculation. The Sticky bit is set to one if any of the - // bits that were shifter out were one - - assign S[31:0] = {32{Shift[5]}} & A[31:0]; - assign S[47:32] = {16{Shift[4]}} & stage1[15:0]; - assign S[55:48] = { 8{Shift[3]}} & stage2[7:0]; - assign S[59:56] = { 4{Shift[2]}} & stage3[3:0]; - assign S[61:60] = { 2{Shift[1]}} & stage4[1:0]; - assign S[62] = Shift[0] & stage5[0]; - assign Sticky = (S != sixtythreezeros); - -endmodule // barrel_shifter_r64 -*/ \ No newline at end of file diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv new file mode 100644 index 000000000..3c28eae2e --- /dev/null +++ b/pipelined/src/fpu/specialcase.sv @@ -0,0 +1,290 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module specialcase( + input logic Xs, // input signs + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic XNaN, YNaN, ZNaN, // inputs are NaN + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic InfIn, + input logic NaNIn, + input logic XInf, YInf, + input logic XZero, + input logic IntZero, + input logic IntToFp, + input logic Int64, + input logic Signed, + input logic CvtOp, + input logic DivOp, + input logic FmaOp, + input logic Plus1, + input logic DivByZero, + input logic [`NE:0] CvtCe, // the calculated expoent + input logic Ws, // the res's sign + input logic IntInvalid, Invalid, Overflow, // flags + input logic CvtResUf, + input logic [`NE-1:0] Re, // Res exponent + input logic [`NE+1:0] FullRe, // Res exponent + input logic [`NF-1:0] Rf, // Res fraction + input logic [`XLEN+1:0] CvtNegRes, // the negation of the result + output logic [`FLEN-1:0] PostProcRes, // final res + output logic [`XLEN-1:0] FCvtIntRes // final res +); + logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results + logic OfResMax; + logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output + logic KillRes; + logic SelOfRes; + + + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); + + if (`FPSIZES == 1) begin + + //NaN res selection depending on standard + if(`IEEE754) begin + assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Ws, Re, Rf}; + + end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? + if(`IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + + assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : + OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Ws, Re, Rf}; + end + `FMT1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + end + `FMT2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; + YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; + ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; + UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; + end + default: begin + if(`IEEE754) begin + XNaNRes = (`FLEN)'(0); + YNaNRes = (`FLEN)'(0); + ZNaNRes = (`FLEN)'(0); + InvalidRes = (`FLEN)'(0); + end else begin + InvalidRes = (`FLEN)'(0); + end + OfRes = (`FLEN)'(0); + UfRes = (`FLEN)'(0); + NormRes = (`FLEN)'(0); + end + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Ws, Re, Rf}; + end + 2'h1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; + YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; + ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; + UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; + end + 2'h0: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; + YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; + ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; + UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; + end + 2'h2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; + YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; + ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; + // zero is exact fi dividing by infinity so don't add 1 + UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; + end + endcase + + end + + + + + + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + // output infinity with result sign if divide by zero + if(`IEEE754) begin + assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes : + YNaN&~CvtOp ? YNaNRes : + ZNaN&FmaOp ? ZNaNRes : + Invalid ? InvalidRes : + SelOfRes ? OfRes : + KillRes ? UfRes : + NormRes; + end else begin + assign PostProcRes = NaNIn|Invalid ? InvalidRes : + SelOfRes ? OfRes : + KillRes ? UfRes : + NormRes; + end + + /////////////////////////////////////////////////////////////////////////////////////// + // + // ||||||||||| ||| ||| ||||||||||||| + // ||| |||||| ||| ||| + // ||| ||| ||| ||| ||| + // ||| ||| |||||| ||| + // ||||||||||| ||| ||| ||| + // + /////////////////////////////////////////////////////////////////////////////////////// + + // *** probably can optimize the negation + // select the overflow integer res + // - negitive infinity and out of range negitive input + // | int | long | + // signed | -2^31 | -2^63 | + // unsigned | 0 | 0 | + // + // - positive infinity and out of range positive input and NaNs + // | int | long | + // signed | 2^31-1 | 2^63-1 | + // unsigned | 2^32-1 | 2^64-1 | + // + // other: 32 bit unsinged res should be sign extended as if it were a signed number + assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive + Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive + Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive + {`XLEN{1'b1}};// unsigned positive + + + // select the integer output + // - if the input is invalid (out of bounds NaN or Inf) then output overflow res + // - if the input underflows + // - if rounding and signed opperation and negitive input, output -1 + // - otherwise output a rounded 0 + // - otherwise output the normal res (trmined and sign extended if nessisary) + assign FCvtIntRes = IntInvalid ? OfIntRes : + CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? + Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv new file mode 100644 index 000000000..9e0315113 --- /dev/null +++ b/pipelined/src/fpu/srt.sv @@ -0,0 +1,259 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module srt( + input logic clk, + input logic DivStart, + input logic DivBusy, + input logic [`FMTBITS-1:0] FmtE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, YZeroE, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, + output logic [`QLEN-1-(`RADIX/4):0] Quot, + output logic [`DIVLEN+3:0] NextWSN, NextWCN, + output logic [`DIVLEN+3:0] StickyWSA, + output logic [`DIVLEN+3:0] FirstWS, FirstWC, + output logic [`NE+1:0] DivCalcExpM, + output logic [`XLEN-1:0] Rem +); + + + /* verilator lint_off UNOPTFLAT */ + logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] D, DBar, D2, DBar2; + logic [`NE+1:0] DivCalcExp; + logic [$clog2(`XLEN+1)-1:0] intExp; + logic intSign; + logic [`QLEN-1:0] QMMux; + + // Top Muxes and Registers + // When start is asserted, the inputs are loaded into the divider. + // Otherwise, the divisor is retained and the partial remainder + // is fed back for the next iteration. + // - when the start signal is asserted X and 0 are loaded into WS and WC + // - otherwise load WSA into the flipflop + // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) + // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized + if (`RADIX == 2) begin : nextw + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + end else begin + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + end + + mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); + flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); + mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); + flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); + flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); + flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); + + + // Divisor Selections + // - choose the negitive version of what's being selected + assign DBar = ~D; + if(`RADIX == 4) begin : d2 + assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; + assign D2 = {D[`DIVLEN+2:0], 1'b0}; + end + + genvar i; + generate + for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations + divinteration divinteration(.D, .DBar, .D2, .DBar2, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + if(i<(`DIVCOPIES-1)) begin + if (`RADIX==2)begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0}; + end else begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + end + assign Q[i+1] = QNext[i]; + assign QM[i+1] = QMNext[i]; + end + end + endgenerate + + // if starting a new divison set Q to 0 and QM to -1 + mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); + flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); + flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); + + assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + assign FirstWS = WS[0]; + assign FirstWC = WC[0]; + if(`RADIX==2) + if (`DIVCOPIES == 1) + assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; + else + assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; + + expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); + +endmodule + +//////////////// +// Submodules // +//////////////// + + /* verilator lint_off UNOPTFLAT */ +module divinteration ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] DBar, D2, DBar2, + input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] WS, WC, + output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] WSA, WCA +); + /* verilator lint_on UNOPTFLAT */ + + logic [`DIVLEN+3:0] Dsel; + logic [3:0] q; + logic qp, qz;//, qn; + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // q encoding: + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 + if(`RADIX == 2) begin : qsel + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); + end else begin + qsel4 qsel4(.D, .WS, .WC, .q); + end + + if(`RADIX == 2) begin : dsel + assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D); + end else begin + always_comb + case (q) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = '0; + 4'b0010: Dsel = D; + 4'b0001: Dsel = D2; + default: Dsel = 'x; + endcase + end + // Partial Product Generation + // WSA, WCA = WS + WC - qD + if (`RADIX == 2) begin : csa + csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + end else begin + csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + end + + if (`RADIX == 2) begin : otfc + otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); + end else begin + otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); + end + +endmodule + + +///////// +// csa // +///////// +module csa #(parameter N=69) ( + input logic [N-1:0] in1, in2, in3, + input logic cin, + output logic [N-1:0] out1, out2 +); + + // This block adds in1, in2, in3, and cin to produce + // a result out1 / out2 in carry-save redundant form. + // cin is just added to the least significant bit and + // is Startuired to handle adding a negative divisor. + // Fortunately, the carry (out2) is shifted left by one + // bit, leaving room in the least significant bit to + // insert cin. + + assign out1 = in1 ^ in2 ^ in3; + assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | + (in2[N-2:0] & in3[N-2:0]), cin}; +endmodule + +module expcalc( + input logic [`FMTBITS-1:0] FmtE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + output logic [`NE+1:0] DivCalcExp + ); + logic [`NE-2:0] Bias; + + if (`FPSIZES == 1) begin + assign Bias = (`NE-1)'(`BIAS); + + end else if (`FPSIZES == 2) begin + assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + + end else if (`FPSIZES == 3) begin + always_comb + case (FmtE) + `FMT: Bias = (`NE-1)'(`BIAS); + `FMT1: Bias = (`NE-1)'(`BIAS1); + `FMT2: Bias = (`NE-1)'(`BIAS2); + default: Bias = 'x; + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (FmtE) + 2'h3: Bias = (`NE-1)'(`Q_BIAS); + 2'h1: Bias = (`NE-1)'(`D_BIAS); + 2'h0: Bias = (`NE-1)'(`S_BIAS); + 2'h2: Bias = (`NE-1)'(`H_BIAS); + endcase + end + // correct exponent for denormalized input's normalization shifts + assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv new file mode 100644 index 000000000..634ecc1d3 --- /dev/null +++ b/pipelined/src/fpu/srtfsm.sv @@ -0,0 +1,93 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module srtfsm( + input logic clk, + input logic reset, + input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic DivStart, + input logic StallE, + input logic StallM, + input logic [`DIVLEN+3:0] StickyWSA, + input logic [`DURLEN-1:0] Dur, + output logic [`DURLEN-1:0] EarlyTermShiftE, + output logic DivStickyE, + output logic DivDone, + output logic NegSticky, + output logic DivBusy + ); + + typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; + statetype state; + + logic [`DURLEN-1:0] step; + logic WZero; + //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DIVLEN+3:0] W; + + //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); + assign DivBusy = (state == BUSY); + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); + // calculate sticky bit + // - there is a chance that a value is subtracted infinitly, resulting in an exact QM result + // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant + // radix-4 division can't create a QM that continually adds 0's + if (`RADIX == 2) + assign DivStickyE = |W&~(StickyWSA == WS); + else + assign DivStickyE = |W; + assign DivDone = (state == DONE); + assign W = WC+WS; + assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? + assign EarlyTermShiftE = step; + + always_ff @(posedge clk) begin + if (reset) begin + state <= #1 IDLE; + end else if (DivStart&~StallE) begin + step <= Dur; + if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; + else state <= #1 BUSY; + end else if (state == BUSY) begin + if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin + state <= #1 DONE; + end + step <= step - 1; + end else if (state == DONE) begin + if (StallM) state <= #1 DONE; + else state <= #1 IDLE; + end + end +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv new file mode 100644 index 000000000..b9fb8bb82 --- /dev/null +++ b/pipelined/src/fpu/srtpreproc.sv @@ -0,0 +1,81 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module srtpreproc ( + input logic [`NF:0] Xm, Ym, + output logic [`DIVLEN-1:0] X, + output logic [`DIVLEN-1:0] Dpreproc, + output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + output logic [`DURLEN-1:0] Dur +); + // logic [`XLEN-1:0] PosA, PosB; + // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + logic [`DIVLEN-1:0] PreprocA, PreprocX; + logic [`DIVLEN-1:0] PreprocB, PreprocY; + + // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; + // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + // lzc #(`XLEN) lzcA (PosA, zeroCntA); + // lzc #(`XLEN) lzcB (PosB, zeroCntB); + + // ***can probably merge X LZC with conversion + // cout the number of leading zeros + lzc #(`NF+1) lzcA (Xm, XZeroCnt); + lzc #(`NF+1) lzcB (Ym, YZeroCnt); + + // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; + + // assign PreprocA = ExtraA << zeroCntA; + // assign PreprocB = ExtraB << (zeroCntB + 1); + assign PreprocX = {Xm[`NF-1:0]<`XLEN) + mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + else + assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2, + .ByteMask(ByteMaskM), .WordCount, .FStore2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), @@ -268,10 +274,10 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), - .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM); subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]), .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]), + .LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register @@ -285,10 +291,10 @@ module lsu ( // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// if (`BIGENDIAN_SUPPORTED) begin:endian - bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM)); + bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM)); bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM)); end else begin - assign FinalWriteDataM = LittleEndianWriteDataM; + assign IEUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordM; end diff --git a/pipelined/src/mmu/pmpadrdec.sv b/pipelined/src/mmu/pmpadrdec.sv index f895ce77f..fc65a68c6 100644 --- a/pipelined/src/mmu/pmpadrdec.sv +++ b/pipelined/src/mmu/pmpadrdec.sv @@ -39,7 +39,6 @@ module pmpadrdec ( input logic [7:0] PMPCfg, input logic [`XLEN-1:0] PMPAdr, input logic PAgePMPAdrIn, - input logic FirstMatch, output logic PAgePMPAdrOut, output logic Match, Active, output logic L, X, W, R @@ -83,10 +82,10 @@ module pmpadrdec ( (AdrMode == NA4 | AdrMode == NAPOT) ? NAMatch : 0; - assign L = PMPCfg[7] & FirstMatch; - assign X = PMPCfg[2] & FirstMatch; - assign W = PMPCfg[1] & FirstMatch; - assign R = PMPCfg[0] & FirstMatch; + assign L = PMPCfg[7]; + assign X = PMPCfg[2]; + assign W = PMPCfg[1]; + assign R = PMPCfg[0]; assign Active = |PMPCfg[4:3]; endmodule diff --git a/pipelined/src/mmu/pmpchecker.sv b/pipelined/src/mmu/pmpchecker.sv index 1bf855072..33de9b30b 100644 --- a/pipelined/src/mmu/pmpchecker.sv +++ b/pipelined/src/mmu/pmpchecker.sv @@ -67,16 +67,16 @@ module pmpchecker ( .PMPAdr(PMPADDR_ARRAY_REGW), .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}), .PAgePMPAdrOut(PAgePMPAdr), - .FirstMatch, .Match, .Active, .L, .X, .W, .R); + .Match, .Active, .L, .X, .W, .R); priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches. // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region - assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; + assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |(L & FirstMatch) : |Active; - assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|X; - assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|W; - assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|R; + assign PMPInstrAccessFaultF = EnforcePMP & ExecuteAccessF & ~|(X & FirstMatch) ; + assign PMPStoreAmoAccessFaultM = EnforcePMP & WriteAccessM & ~|(W & FirstMatch) ; + assign PMPLoadAccessFaultM = EnforcePMP & ReadAccessM & ~|(R & FirstMatch) ; end else begin: pmpchecker // no checker assign PMPInstrAccessFaultF = 0; assign PMPLoadAccessFaultM = 0; diff --git a/pipelined/src/uncore/ahbapbbridge.sv b/pipelined/src/uncore/ahbapbbridge.sv index e05ee3d82..8ebd1d308 100644 --- a/pipelined/src/uncore/ahbapbbridge.sv +++ b/pipelined/src/uncore/ahbapbbridge.sv @@ -34,9 +34,11 @@ module ahbapbbridge #(PERIPHS = 2) ( input logic [PERIPHS-1:0] HSEL, input logic [31:0] HADDR, input logic [`XLEN-1:0] HWDATA, + input logic [`XLEN/8-1:0] HWSTRB, input logic HWRITE, input logic [1:0] HTRANS, input logic HREADY, +// input logic [3:0] HPROT, // not used output logic [`XLEN-1:0] HRDATA, output logic HRESP, HREADYOUT, output logic PCLK, PRESETn, @@ -45,30 +47,37 @@ module ahbapbbridge #(PERIPHS = 2) ( output logic PENABLE, output logic [31:0] PADDR, output logic [`XLEN-1:0] PWDATA, +// output logic [2:0] PPROT, // not used + output logic [`XLEN/8-1:0] PSTRB, +// output logic PWAKEUP // not used input logic [PERIPHS-1:0] PREADY, - input var [`XLEN-1:0][PERIPHS-1:0] PRDATA + input var [PERIPHS-1:0][`XLEN-1:0] PRDATA ); - logic activeTrans; + logic initTrans, initTransSel, initTransSelD; logic nextPENABLE; + logic PREADYOUT; // convert AHB to APB signals assign PCLK = HCLK; assign PRESETn = HRESETn; // identify start of a transaction - assign activeTrans = (HTRANS == 2'b10); // only accept nonsequential transactions - assign initTrans = activeTrans & HREADY; // start a transaction when the bus is ready and an active transaction is requested + assign initTrans = HTRANS[1] & HREADY; // start a transaction when the bus is ready and an active transaction is requested assign initTransSel = initTrans & |HSEL; // capture data and address if any of the peripherals are selected // delay AHB Address phase signals to align with AHB Data phase because APB expects them at the same time - flopenr #(32) addrreg(HCLK, ~HRESETn, initTransSel, HADDR, PADDR); - flopenr #(1) writereg(HCLK, ~HRESETn, initTransSel, HWRITE, PWRITE); - // enable selreg with iniTrans rather than initTransSel so PSEL can turn off - flopenr #(PERIPHS) selreg(HCLK, ~HRESETn, initTrans, HSEL & {PERIPHS{activeTrans}}, PSEL); - // AHB Data phase signal doesn't need delay. Note that HWDATA is guaranteed to remain stable until READY is asserted + flopen #(32) addrreg(HCLK, HREADY, HADDR, PADDR); + flopenr #(1) writereg(HCLK, ~HRESETn, HREADY, HWRITE, PWRITE); + flopenr #(PERIPHS) selreg(HCLK, ~HRESETn, HREADY, HSEL & {PERIPHS{initTrans}}, PSEL); + // PPROT[2:0] = {Data/InstrB, Secure, Privileged}; + // assign PPROT = {~HPROT[0], 1'b0, HPROT[1]}; // protection not presently used + // assign PWAKEUP = 1'b1; // not used + + // AHB Data phase signal doesn't need delay. Note that they are guaranteed to remain stable until READY is asserted assign PWDATA = HWDATA; + assign PSTRB = HWSTRB; // enable logic: goes high a cycle after initTrans, then back low on cycle after desired PREADY is asserted // cycle1: AHB puts HADDR, HWRITE, HSEL on bus. initTrans is 1, and these are captured @@ -81,16 +90,19 @@ module ahbapbbridge #(PERIPHS = 2) ( // result and ready multiplexer int i; - always_comb + always_comb begin + // default: no peripheral selected: read 0, indicate ready during access phase so bus doesn't hang + // *** also could assert ready right away + HRDATA = 0; + PREADYOUT = 1'b1; for (i=0; i1 cycle to respond + // word aligned reads - if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000}; - else assign #2 entry = {HADDR[15:2], 2'b00}; + if (`XLEN==64) assign #2 entry = {PADDR[15:3], 3'b000}; + else assign #2 entry = {PADDR[15:2], 2'b00}; - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(entryd[2:0]), .ByteMask(ByteMaskM)); - // DH 2/20/21: Eventually allow MTIME to run off a separate clock // This will require synchronizing MTIME to the system clock // before it is read or compared to MTIMECMP. @@ -76,83 +66,83 @@ module clint ( // register access if (`XLEN==64) begin:clint // 64-bit - always @(posedge HCLK) begin + always @(posedge PCLK) begin case(entry) - 16'h0000: HREADCLINT <= {63'b0, MSIP}; - 16'h4000: HREADCLINT <= MTIMECMP; - 16'hBFF8: HREADCLINT <= MTIME; - default: HREADCLINT <= 0; + 16'h0000: PRDATA <= {63'b0, MSIP}; + 16'h4000: PRDATA <= MTIMECMP; + 16'hBFF8: PRDATA <= MTIME; + default: PRDATA <= 0; endcase end - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK or negedge PRESETn) + if (~PRESETn) begin MSIP <= 0; MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts end else if (memwrite) begin - if (entryd == 16'h0000) MSIP <= HWDATA[0]; - if (entryd == 16'h4000) begin + if (entry == 16'h0000) MSIP <= PWDATA[0]; + if (entry == 16'h4000) begin for(i=0;i<`XLEN/8;i++) - if(ByteMaskM[i]) - MTIMECMP[i*8 +: 8] <= HWDATA[i*8 +: 8]; // ***dh: this notation isn't in book yet - maybe from Ross + if(PSTRB[i]) + MTIMECMP[i*8 +: 8] <= PWDATA[i*8 +: 8]; // ***dh: this notation isn't in book yet - maybe from Ross end end // eventually replace MTIME logic below with timereg -// timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), 1'b0, HWDATA, MTIME, done); +// timereg tr(PCLK, PRESETn, TIMECLK, memwrite & (entry==16'hBFF8), 1'b0, PWDATA, MTIME, done); - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK or negedge PRESETn) + if (~PRESETn) begin MTIME <= 0; - end else if (memwrite & entryd == 16'hBFF8) begin + end else if (memwrite & entry == 16'hBFF8) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed for(j=0;j<`XLEN/8;j++) - if(ByteMaskM[j]) - MTIME[j*8 +: 8] <= HWDATA[j*8 +: 8]; + if(PSTRB[j]) + MTIME[j*8 +: 8] <= PWDATA[j*8 +: 8]; end else MTIME <= MTIME + 1; end else begin:clint // 32-bit - always @(posedge HCLK) begin + always @(posedge PCLK) begin case(entry) - 16'h0000: HREADCLINT <= {31'b0, MSIP}; - 16'h4000: HREADCLINT <= MTIMECMP[31:0]; - 16'h4004: HREADCLINT <= MTIMECMP[63:32]; - 16'hBFF8: HREADCLINT <= MTIME[31:0]; - 16'hBFFC: HREADCLINT <= MTIME[63:32]; - default: HREADCLINT <= 0; + 16'h0000: PRDATA <= {31'b0, MSIP}; + 16'h4000: PRDATA <= MTIMECMP[31:0]; + 16'h4004: PRDATA <= MTIMECMP[63:32]; + 16'hBFF8: PRDATA <= MTIME[31:0]; + 16'hBFFC: PRDATA <= MTIME[63:32]; + default: PRDATA <= 0; endcase end - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK or negedge PRESETn) + if (~PRESETn) begin MSIP <= 0; MTIMECMP <= 0; // MTIMECMP is not reset ***? end else if (memwrite) begin - if (entryd == 16'h0000) MSIP <= HWDATA[0]; - if (entryd == 16'h4000) + if (entry == 16'h0000) MSIP <= PWDATA[0]; + if (entry == 16'h4000) for(j=0;j<`XLEN/8;j++) - if(ByteMaskM[j]) - MTIMECMP[j*8 +: 8] <= HWDATA[j*8 +: 8]; - if (entryd == 16'h4004) + if(PSTRB[j]) + MTIMECMP[j*8 +: 8] <= PWDATA[j*8 +: 8]; + if (entry == 16'h4004) for(j=0;j<`XLEN/8;j++) - if(ByteMaskM[j]) - MTIMECMP[32 + j*8 +: 8] <= HWDATA[j*8 +: 8]; + if(PSTRB[j]) + MTIMECMP[32 + j*8 +: 8] <= PWDATA[j*8 +: 8]; // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed end // eventually replace MTIME logic below with timereg -// timereg tr(HCLK, HRESETn, TIMECLK, memwrite & (entryd==16'hBFF8), memwrite & (entryd == 16'hBFFC), HWDATA, MTIME, done); - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) begin +// timereg tr(PCLK, PRESETn, TIMECLK, memwrite & (entry==16'hBFF8), memwrite & (entry == 16'hBFFC), PWDATA, MTIME, done); + always_ff @(posedge PCLK or negedge PRESETn) + if (~PRESETn) begin MTIME <= 0; // MTIMECMP is not reset - end else if (memwrite & (entryd == 16'hBFF8)) begin + end else if (memwrite & (entry == 16'hBFF8)) begin for(i=0;i<`XLEN/8;i++) - if(ByteMaskM[i]) - MTIME[i*8 +: 8] <= HWDATA[i*8 +: 8]; - end else if (memwrite & (entryd == 16'hBFFC)) begin + if(PSTRB[i]) + MTIME[i*8 +: 8] <= PWDATA[i*8 +: 8]; + end else if (memwrite & (entry == 16'hBFFC)) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed for(i=0;i<`XLEN/8;i++) - if(ByteMaskM[i]) - MTIME[32 + i*8 +: 8]<= HWDATA[i*8 +: 8]; + if(PSTRB[i]) + MTIME[32 + i*8 +: 8]<= PWDATA[i*8 +: 8]; end else MTIME <= MTIME + 1; end @@ -183,18 +173,18 @@ module timeregsync( endmodule module timereg( - input logic HCLK, HRESETn, TIMECLK, + input logic PCLK, PRESETn, TIMECLK, input logic we0, we1, - input logic [`XLEN-1:0] HWDATA, + input logic [`XLEN-1:0] PWDATA, output logic [63:0] MTIME, output logic done); -// if (`TIMEBASE_SYNC) begin:timereg // use HCLK for MTIME - if (1) begin:timereg // use HCLK for MTIME - timregsync timeregsync(.clk(HCLK), .resetn(HRESETn), .we0, .we1, .wd(HWDATA), .q(MTIME)); +// if (`TIMEBASE_SYNC) begin:timereg // use PCLK for MTIME + if (1) begin:timereg // use PCLK for MTIME + timregsync timeregsync(.clk(PCLK), .resetn(PRESETn), .we0, .we1, .wd(PWDATA), .q(MTIME)); assign done = 1; // immediately completes end else begin // use asynchronous TIMECLK - // TIME counter runs on TIMECLK but bus interface runs on HCLK + // TIME counter runs on TIMECLK but bus interface runs on PCLK // Need to synchronize reads and writes // This is subtle because synchronizing a binary counter on a per-bit basis could give a mix of old and new bits // Instead, we use a Gray coded counter that only changes one bit per cycle @@ -210,29 +200,29 @@ module timereg( // When a write enable is asserted for a cycle, sample the enables and data and raise a request until it is acknowledged // When the acknowledge falls, the transaction is done and the system is ready for another write. // ***look at redoing this assuming write enable and data are held rather than pulsed. - always_ff @(posedge HCLK or negedge HRESETn) - if (~HRESETn) + always_ff @(posedge PCLK or negedge PRESETn) + if (~PRESETn) req <= 0; // don't bother resetting wd else begin req <= we0 | we1 | req & ~ack; we0_stored <= we0; we1_stored <= we1; - wd_stored <= HWDATA; + wd_stored <= PWDATA; ack_stored <= ack; done <= ack_stored & ~ack; end // synchronize the reset and reqest into the TIMECLK domain - sync resetsync(TIMECLK, HRESETn, resetn_sync); + sync resetsync(TIMECLK, PRESETn, resetn_sync); sync rsync(TIMECLK, req, req_sync); - // synchronize the acknowledge back to the HCLK domain to indicate the request was handled and can be lowered - sync async(HCLK, req_sync, ack); + // synchronize the acknowledge back to the PCLK domain to indicate the request was handled and can be lowered + sync async(PCLK, req_sync, ack); timeregsync timeregsync(.clk(TIMECLK), .resetn(resetn_sync), .we0(we0_stored), .we1(we1_stored), .wd(wd_stored), .q(time_int)); binarytogray b2g(time_int, time_int_gc); flop gcreg(TIMECLK, time_int_gc, time_gc); - sync timesync[63:0](HCLK, time_gc, MTIME_GC); + sync timesync[63:0](PCLK, time_gc, MTIME_GC); graytobinary g2b(MTIME_GC, MTIME); end endmodule diff --git a/pipelined/src/uncore/gpio.sv b/pipelined/src/uncore/gpio.sv deleted file mode 100644 index 30ef0e31a..000000000 --- a/pipelined/src/uncore/gpio.sv +++ /dev/null @@ -1,161 +0,0 @@ -/////////////////////////////////////////// -// gpio.sv -// -// Written: David_Harris@hmc.edu 14 January 2021 -// Modified: bbracker@hmc.edu 15 Apr. 2021 -// -// Purpose: General Purpose I/O peripheral -// See FE310-G002-Manual-v19p05 for specifications -// No interrupts, drive strength, or pull-ups supported -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module gpio ( - input logic HCLK, HRESETn, - input logic HSELGPIO, - input logic [7:0] HADDR, - input logic [`XLEN-1:0] HWDATA, - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - output logic [`XLEN-1:0] HREADGPIO, - output logic HRESPGPIO, HREADYGPIO, - input logic [31:0] GPIOPinsIn, - output logic [31:0] GPIOPinsOut, GPIOPinsEn, - output logic GPIOIntr); - - logic [31:0] input0d, input1d, input2d, input3d; - logic [31:0] input_val, input_en, output_en, output_val; - logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor; - - logic initTrans, memwrite; - logic [7:0] entry, entryd; - logic [31:0] Din, Dout; - - // AHB I/O - assign entry = {HADDR[7:2],2'b0}; - assign initTrans = HREADY & HSELGPIO & (HTRANS != 2'b00); - // entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data - flopr #(1) memwriteflop(HCLK, ~HRESETn, initTrans & HWRITE, memwrite); - flopr #(8) entrydflop(HCLK, ~HRESETn, entry, entryd); - assign HRESPGPIO = 0; // OK - assign HREADYGPIO = 1'b1; // GPIO never takes >1 cycle to respond - - // account for subword read/write circuitry - // -- Note GPIO registers are 32 bits no matter what; access them with LW SW. - // (At least that's what I think when FE310 spec says "only naturally aligned 32-bit accesses are supported") - if (`XLEN == 64) begin - assign Din = entryd[2] ? HWDATA[63:32] : HWDATA[31:0]; - assign HREADGPIO = entryd[2] ? {Dout,32'b0} : {32'b0,Dout}; - end else begin // 32-bit - assign Din = HWDATA[31:0]; - assign HREADGPIO = Dout; - end - - // register access - always_ff @(posedge HCLK, negedge HRESETn) begin - // writes - if (~HRESETn) begin - // asynch reset - input_en <= 0; - output_en <= 0; - // *** synch reset not yet implemented - output_val <= #1 0; - rise_ie <= #1 0; - rise_ip <= #1 0; - fall_ie <= #1 0; - fall_ip <= #1 0; - high_ie <= #1 0; - high_ip <= #1 0; - low_ie <= #1 0; - low_ip <= #1 0; - out_xor <= #1 0; - end else begin - // writes - if (memwrite) - // According to FE310 spec: Once the interrupt is pending, it will remain set until a 1 is written to the *_ip register at that bit. - /* verilator lint_off CASEINCOMPLETE */ - case(entryd) - 8'h04: input_en <= #1 Din; - 8'h08: output_en <= #1 Din; - 8'h0C: output_val <= #1 Din; - 8'h18: rise_ie <= #1 Din; - 8'h20: fall_ie <= #1 Din; - 8'h28: high_ie <= #1 Din; - 8'h30: low_ie <= #1 Din; - 8'h40: out_xor <= #1 Din; - endcase - /* verilator lint_on CASEINCOMPLETE */ - // reads - case(entry) - 8'h00: Dout <= #1 input_val; - 8'h04: Dout <= #1 input_en; - 8'h08: Dout <= #1 output_en; - 8'h0C: Dout <= #1 output_val; - 8'h18: Dout <= #1 rise_ie; - 8'h1C: Dout <= #1 rise_ip; - 8'h20: Dout <= #1 fall_ie; - 8'h24: Dout <= #1 fall_ip; - 8'h28: Dout <= #1 high_ie; - 8'h2C: Dout <= #1 high_ip; - 8'h30: Dout <= #1 low_ie; - 8'h34: Dout <= #1 low_ip; - 8'h40: Dout <= #1 out_xor; - default: Dout <= #1 0; - endcase - // interrupts - if (memwrite & (entryd == 8'h1C)) - rise_ip <= rise_ip & ~Din; - else - rise_ip <= rise_ip | (input2d & ~input3d); - if (memwrite & (entryd == 8'h24)) - fall_ip <= fall_ip & ~Din; - else - fall_ip <= fall_ip | (~input2d & input3d); - if (memwrite & (entryd == 8'h2C)) - high_ip <= high_ip & ~Din; - else - high_ip <= high_ip | input3d; - if (memwrite & (entryd == 8'h34)) - low_ip <= low_ip & ~Din; - else - low_ip <= low_ip | ~input3d; - end - end - - // chip i/o - // connect OUT to IN for loopback testing - if (`GPIO_LOOPBACK_TEST) assign input0d = ((output_en & GPIOPinsOut) | (~output_en & GPIOPinsIn)) & input_en; - else assign input0d = GPIOPinsIn & input_en; - flop #(32) sync1(HCLK,input0d,input1d); - flop #(32) sync2(HCLK,input1d,input2d); - flop #(32) sync3(HCLK,input2d,input3d); - assign input_val = input3d; - assign GPIOPinsOut = output_val ^ out_xor; - assign GPIOPinsEn = output_en; - - assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)}; -endmodule - diff --git a/pipelined/src/uncore/gpio_apb.sv b/pipelined/src/uncore/gpio_apb.sv index e5d681151..e0d4f01d6 100644 --- a/pipelined/src/uncore/gpio_apb.sv +++ b/pipelined/src/uncore/gpio_apb.sv @@ -37,10 +37,12 @@ module gpio_apb ( input logic PSEL, input logic [7:0] PADDR, input logic [`XLEN-1:0] PWDATA, + input logic [`XLEN/8-1:0] PSTRB, input logic PWRITE, input logic PENABLE, output logic [`XLEN-1:0] PRDATA, output logic PREADY, + input logic [31:0] iof0, iof1, input logic [31:0] GPIOPinsIn, output logic [31:0] GPIOPinsOut, GPIOPinsEn, output logic GPIOIntr); @@ -48,6 +50,7 @@ module gpio_apb ( logic [31:0] input0d, input1d, input2d, input3d; logic [31:0] input_val, input_en, output_en, output_val; logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip; + logic [31:0] out_xor, iof_en, iof_sel, iof_out, gpio_out; logic [7:0] entry; logic [31:0] Din, Dout; @@ -55,8 +58,8 @@ module gpio_apb ( // APB I/O assign entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses - assign memwrite = PWRITE & PENABLE; // only write in access phase - assign PREADY = PENABLE; // GPIO never takes >1 cycle to respond + assign memwrite = PWRITE & PENABLE & PSEL; // only write in access phase + assign PREADY = 1'b1; // GPIO never takes >1 cycle to respond // account for subword read/write circuitry // -- Note GPIO registers are 32 bits no matter what; access them with LW SW. @@ -84,6 +87,9 @@ module gpio_apb ( high_ip <= #1 0; low_ie <= #1 0; low_ip <= #1 0; + iof_en <= #1 0; + iof_sel <= #1 0; + out_xor <= #1 0; end else begin // writes // According to FE310 spec: Once the interrupt is pending, it will remain set until a 1 is written to the *_ip register at that bit. /* verilator lint_off CASEINCOMPLETE */ @@ -96,7 +102,9 @@ module gpio_apb ( 8'h20: fall_ie <= #1 Din; 8'h28: high_ie <= #1 Din; 8'h30: low_ie <= #1 Din; - 8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR + 8'h38: iof_en <= #1 Din; + 8'h3C: iof_sel <= #1 Din; + 8'h40: out_xor <= #1 Din; endcase /* verilator lint_on CASEINCOMPLETE */ @@ -123,7 +131,9 @@ module gpio_apb ( 8'h2C: Dout <= #1 high_ip; 8'h30: Dout <= #1 low_ie; 8'h34: Dout <= #1 low_ip; - 8'h40: Dout <= #1 0; // OUT_XOR reads as 0 + 8'h38: Dout <= #1 iof_en; + 8'h3C: Dout <= #1 iof_sel; + 8'h40: Dout <= #1 out_xor; default: Dout <= #1 0; endcase end @@ -138,7 +148,9 @@ module gpio_apb ( flop #(32) sync2(PCLK,input1d,input2d); flop #(32) sync3(PCLK,input2d,input3d); assign input_val = input3d; - assign GPIOPinsOut = output_val; + assign iof_out = iof_sel & iof1 | ~iof_sel & iof0; // per-bit mux between iof1 and iof0 + assign gpio_out = iof_en & iof_out | ~iof_en & output_val; // per-bit mux between IOF and output_val + assign GPIOPinsOut = gpio_out ^ out_xor; // per-bit flip output polarity assign GPIOPinsEn = output_en; assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)}; diff --git a/pipelined/src/uncore/plic.sv b/pipelined/src/uncore/plic_apb.sv similarity index 86% rename from pipelined/src/uncore/plic.sv rename to pipelined/src/uncore/plic_apb.sv index 9291358ae..51e94d7f4 100644 --- a/pipelined/src/uncore/plic.sv +++ b/pipelined/src/uncore/plic_apb.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// plic.sv +// plic_apb.sv // // Written: bbracker@hmc.edu 18 January 2021 // Modified: @@ -46,21 +46,21 @@ // number of conexts // hardcoded to 2 contexts for now; *** later upgrade to arbitrary (up to 15872) contexts -module plic ( - input logic HCLK, HRESETn, - input logic HSELPLIC, - input logic [27:0] HADDR, // *** could factor out entryd into HADDRd at the level of uncore - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - input logic [`XLEN-1:0] HWDATA, +module plic_apb ( + input logic PCLK, PRESETn, + input logic PSEL, + input logic [27:0] PADDR, + input logic [`XLEN-1:0] PWDATA, + input logic [`XLEN/8-1:0] PSTRB, + input logic PWRITE, + input logic PENABLE, + output logic [`XLEN-1:0] PRDATA, + output logic PREADY, input logic UARTIntr,GPIOIntr, - output logic [`XLEN-1:0] HREADPLIC, - output logic HRESPPLIC, HREADYPLIC, (* mark_debug = "true" *) output logic MExtInt, SExtInt); logic memwrite, memread, initTrans; - logic [23:0] entry, entryd; + logic [23:0] entry; logic [31:0] Din, Dout; // context-independent signals @@ -81,31 +81,28 @@ module plic ( // ======= // AHB I/O // ======= - assign entry = {HADDR[23:2],2'b0}; - assign initTrans = HREADY & HSELPLIC & (HTRANS != 2'b00); - assign memread = initTrans & ~HWRITE; - // entryd and memwrite are delayed by a cycle because AHB controller waits a cycle before outputting write data - flopr #(1) memwriteflop(HCLK, ~HRESETn, initTrans & HWRITE, memwrite); - flopr #(24) entrydflop(HCLK, ~HRESETn, entry, entryd); - assign HRESPPLIC = 0; // OK - assign HREADYPLIC = 1'b1; // PLIC never takes >1 cycle to respond + + assign memwrite = PWRITE & PENABLE & PSEL; // only write in access phase + assign memread = ~PWRITE & PSEL; // read at start of access phase. PENABLE hasn't set up before this + assign PREADY = 1'b1; // PLIC never takes >1 cycle to respond + assign entry = {PADDR[23:2],2'b0}; // account for subword read/write circuitry // -- Note PLIC registers are 32 bits no matter what; access them with LW SW. if (`XLEN == 64) begin - assign Din = entryd[2] ? HWDATA[63:32] : HWDATA[31:0]; - assign HREADPLIC = entryd[2] ? {Dout,32'b0} : {32'b0,Dout}; + assign Din = entry[2] ? PWDATA[63:32] : PWDATA[31:0]; + assign PRDATA = entry[2] ? {Dout,32'b0} : {32'b0,Dout}; end else begin // 32-bit - assign HREADPLIC = Dout; - assign Din = HWDATA[31:0]; + assign PRDATA = Dout; + assign Din = PWDATA[31:0]; end // ================== // Register Interface // ================== - always @(posedge HCLK,negedge HRESETn) begin + always @(posedge PCLK,negedge PRESETn) begin // resetting - if (~HRESETn) begin + if (~PRESETn) begin intPriority <= #1 {`N{3'b0}}; intEn <= #1 {2{`N'b0}}; intThreshold <= #1 {2{3'b0}}; @@ -113,8 +110,8 @@ module plic ( // writing end else begin if (memwrite) - casez(entryd) - 24'h0000??: intPriority[entryd[7:2]] <= #1 Din[2:0]; + casez(entry) + 24'h0000??: intPriority[entry[7:2]] <= #1 Din[2:0]; `ifdef PLIC_NUM_SRC_LT_32 // *** switch to a generate for loop so as to deprecate PLIC_NUM_SRC_LT_32 and allow up to 1023 sources 24'h002000: intEn[0][`N:1] <= #1 Din[`N:1]; 24'h002080: intEn[1][`N:1] <= #1 Din[`N:1]; @@ -130,7 +127,7 @@ module plic ( 24'h201000: intThreshold[1] <= #1 Din[2:0]; 24'h201004: intInProgress <= #1 intInProgress & ~(`N'b1 << (Din[5:0]-1)); // lower "InProgress" to signify completion endcase - // reading + // Read synchronously because a read can have side effect of changing intInProgress if (memread) casez(entry) 24'h0000??: Dout <= #1 {29'b0,intPriority[entry[7:2]]}; @@ -159,9 +156,8 @@ module plic ( end default: Dout <= #1 32'h0; // invalid access endcase - else - Dout <= #1 32'h0; - end + else Dout <= #1 32'h0; + end end // connect sources to requests @@ -176,9 +172,9 @@ module plic ( end // pending interrupt requests - //assign nextIntPending = (intPending | requests) & ~intInProgress; - assign nextIntPending = requests; - flopr #(`N) intPendingFlop(HCLK,~HRESETn,nextIntPending,intPending); + assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully. + //assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion + flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending); // context-dependent signals genvar ctx; diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index 7ef023fe6..b850321e8 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -38,7 +38,7 @@ module ram #(parameter BASE=0, RANGE = 65535) ( input logic HREADY, input logic [1:0] HTRANS, input logic [`XLEN-1:0] HWDATA, - input logic [3:0] HSIZED, + input logic [`XLEN/8-1:0] HWSTRB, output logic [`XLEN-1:0] HREADRam, output logic HRESPRam, HREADYRam ); @@ -69,13 +69,8 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr); - // Byte mask for subword writes - // ***the CLINT and other peripherals duplicate this hardware - // *** it shoudl be centralized and sent over HWSTRB - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask)); - // single-ported RAM bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH) - memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); + memory(.clk(HCLK), .we(memwriteD), .bwe(HWSTRB), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA)); endmodule diff --git a/pipelined/src/uncore/ram_orig.sv b/pipelined/src/uncore/ram_orig.sv deleted file mode 100644 index bc852cf08..000000000 --- a/pipelined/src/uncore/ram_orig.sv +++ /dev/null @@ -1,107 +0,0 @@ -/////////////////////////////////////////// -// ram_orig.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: On-chip RAM, external to core -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module ram_orig #(parameter BASE=0, RANGE = 65535) ( - input logic HCLK, HRESETn, - input logic HSELRam, - input logic [31:0] HADDR, - input logic HWRITE, - input logic HREADY, - input logic [1:0] HTRANS, - input logic [`XLEN-1:0] HWDATA, - input logic [3:0] HSIZED, - output logic [`XLEN-1:0] HREADRam, - output logic HRESPRam, HREADYRam -); - - // Desired changes. - // 1. find a way to merge read and write address into 1 port. - // 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.) - // 3. implement burst. - // 4. remove the configurable latency. - - logic [`XLEN/8-1:0] ByteMaskM; - logic [31:0] HWADDR, A; - logic prevHREADYRam, risingHREADYRam; - logic initTrans; - logic memwrite; - logic [3:0] busycount; - - swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HWADDR[2:0]), .ByteMask(ByteMaskM)); - - assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00); - - // *** this seems like a weird way to use reset - flopenr #(1) memwritereg(HCLK, 1'b0, initTrans | ~HRESETn, HSELRam & HWRITE, memwrite); - flopenr #(32) haddrreg(HCLK, 1'b0, initTrans | ~HRESETn, HADDR, A); - - // busy FSM to extend READY signal - always @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin - busycount <= 0; - HREADYRam <= #1 0; - end else begin - if (initTrans) begin - busycount <= 0; - HREADYRam <= #1 0; - end else if (~HREADYRam) begin - if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2 - HREADYRam <= #1 1; - end else begin - busycount <= busycount + 1; - end - end - end - assign HRESPRam = 0; // OK - - localparam ADDR_WDITH = $clog2(RANGE/8); - localparam OFFSET = $clog2(`XLEN/8); - - // Rising HREADY edge detector - // Indicates when ram is finishing up - // Needed because HREADY may go high for other reasons, - // and we only want to write data when finishing up. - flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam); - assign risingHREADYRam = HREADYRam & ~prevHREADYRam; - - always @(posedge HCLK) - HWADDR <= #1 A; - - bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA) - memory(.clk(HCLK), .reA(1'b1), - .addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam), - .weB(memwrite & risingHREADYRam), .bweB(ByteMaskM), - .addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); - - -endmodule - diff --git a/pipelined/src/uncore/sdc/SDC.sv b/pipelined/src/uncore/sdc/SDC.sv index 2aa548d87..0c34e1843 100644 --- a/pipelined/src/uncore/sdc/SDC.sv +++ b/pipelined/src/uncore/sdc/SDC.sv @@ -144,7 +144,7 @@ module SDC // currently does not support writes - assign InitTrans = HREADY & HSELSDC & (HTRANS != 2'b00); + assign InitTrans = HREADY & HSELSDC & HTRANS[1]; //assign RegRead = InitTrans & ~HWRITE; // register resolve combo loop flopr #(1) RegReadReg(HCLK, ~HRESETn, InitTrans & ~HWRITE, RegRead); diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index ca9481fa3..524a63454 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -40,7 +40,7 @@ module uartPC16550D( // Processor Interface - input logic HCLK, HRESETn, + input logic PCLK, PRESETn, input logic [2:0] A, input logic [7:0] Din, output logic [7:0] Dout, @@ -132,7 +132,7 @@ module uartPC16550D( /////////////////////////////////////////// // Input synchronization: 2-stage synchronizer /////////////////////////////////////////// - always_ff @(posedge HCLK) begin + always_ff @(posedge PCLK) begin {SINd, DSRbd, DCDbd, CTSbd, RIbd} <= #1 {SIN, DSRb, DCDb, CTSb, RIb}; {SINsync, DSRbsync, DCDbsync, CTSbsync, RIbsync} <= #1 loop ? {SOUTbit, ~MCR[0], ~MCR[3], ~MCR[1], ~MCR[2]} : {SINd, DSRbd, DCDbd, CTSbd, RIbd}; // syncrhonized signals, handle loopback testing @@ -142,8 +142,8 @@ module uartPC16550D( /////////////////////////////////////////// // Register interface (Table 1, note some are read only and some write only) /////////////////////////////////////////// - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin // Table 3 Reset Configuration + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin // Table 3 Reset Configuration IER <= #1 4'b0; FCR <= #1 8'b0; if (`QEMU) LCR <= #1 8'b0; else LCR <= #1 8'b11; // fpga only **** BUG @@ -229,8 +229,8 @@ module uartPC16550D( /////////////////////////////////////////// // Ross Thompson: Found a bug. If the baud rate dividers DLM, and DLL are reloaded // the baudcount is not reset to {DLM, DLL, UART_PRESCALE} - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin baudcount <= #1 1; baudpulse <= #1 0; end else if (~MEMWb & DLAB & (A == 3'b0 | A == 3'b1)) begin @@ -254,8 +254,8 @@ module uartPC16550D( /////////////////////////////////////////// // receive timing and control /////////////////////////////////////////// - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin rxoversampledcnt <= #1 0; rxstate <= #1 UART_IDLE; rxbitsreceived <= #1 0; @@ -288,8 +288,8 @@ module uartPC16550D( /////////////////////////////////////////// // receive shift register, buffer register, FIFO /////////////////////////////////////////// - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) rxshiftreg <= #1 10'b0000000001; // initialize so that there is a valid stop bit + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) rxshiftreg <= #1 10'b0000000001; // initialize so that there is a valid stop bit else if (rxcentered) rxshiftreg <= #1 {rxshiftreg[8:0], SINsync}; // capture bit assign rxparitybit = rxshiftreg[1]; // parity, if it exists, in bit 1 when all done assign rxstopbit = rxshiftreg[0]; @@ -310,8 +310,8 @@ module uartPC16550D( assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time // receive FIFO and register - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin rxfifohead <= #1 0; rxfifotail <= #1 0; rxdataready <= #1 0; RXBR <= #1 0; end else begin if (rxstate == UART_DONE) begin @@ -367,8 +367,8 @@ module uartPC16550D( assign rxfifohaserr = |(RXerrbit & rxfullbit); // receive buffer register and ready bit - always_ff @(posedge HCLK, negedge HRESETn) // track rxrdy for DMA mode (FCR3 = FCR0 = 1) - if (~HRESETn) rxfifodmaready <= #1 0; + always_ff @(posedge PCLK, negedge PRESETn) // track rxrdy for DMA mode (FCR3 = FCR0 = 1) + if (~PRESETn) rxfifodmaready <= #1 0; else if (rxfifotriggered | rxfifotimeout) rxfifodmaready <= #1 1; else if (rxfifoempty) rxfifodmaready <= #1 0; @@ -386,8 +386,8 @@ module uartPC16550D( /////////////////////////////////////////// // transmit timing and control /////////////////////////////////////////// - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin txoversampledcnt <= #1 0; txstate <= #1 UART_IDLE; txbitssent <= #1 0; @@ -435,8 +435,8 @@ module uartPC16550D( end // registers & FIFO - always_ff @(posedge HCLK, negedge HRESETn) - if (~HRESETn) begin + always_ff @(posedge PCLK, negedge PRESETn) + if (~PRESETn) begin txfifohead <= #1 0; txfifotail <= #1 0; txhrfull <= #1 0; txsrfull <= #1 0; TXHR <= #1 0; txsr <= #1 12'hfff; end else begin if (~MEMWb & A == 3'b000 & ~DLAB) begin // writing transmit holding register or fifo @@ -477,8 +477,8 @@ module uartPC16550D( assign txfifofull = (txfifoentries == 4'b1111); // transmit buffer ready bit - always_ff @(posedge HCLK, negedge HRESETn) // track txrdy for DMA mode (FCR3 = FCR0 = 1) - if (~HRESETn) txfifodmaready <= #1 0; + always_ff @(posedge PCLK, negedge PRESETn) // track txrdy for DMA mode (FCR3 = FCR0 = 1) + if (~PRESETn) txfifodmaready <= #1 0; else if (txfifoempty) txfifodmaready <= #1 1; else if (txfifofull) txfifodmaready <= #1 0; @@ -514,18 +514,18 @@ module uartPC16550D( intrpending = 0; end end - always @(posedge HCLK) INTR <= #1 intrpending; // prevent glitches on interrupt pin + always @(posedge PCLK) INTR <= #1 intrpending; // prevent glitches on interrupt pin // Side effect of reading LSR is lowering overrun, parity, framing, break intr's assign setSquashRXerrIP = ~MEMRb & (A==3'b101); assign resetSquashRXerrIP = (rxstate == UART_DONE); assign squashRXerrIP = (prevSquashRXerrIP | setSquashRXerrIP) & ~resetSquashRXerrIP; - flopr #(1) squashRXerrIPreg(HCLK, ~HRESETn, squashRXerrIP, prevSquashRXerrIP); + flopr #(1) squashRXerrIPreg(PCLK, ~PRESETn, squashRXerrIP, prevSquashRXerrIP); // Side effect of reading IIR is lowering THRE_IP if most significant intr assign setSquashTHRE_IP = ~MEMRb & (A==3'b010) & (intrID==3'h1); // there's a 1-cycle delay on set squash so that THRE_IP doesn't change during the process of reading IIR (otherwise combinational loop) assign resetSquashTHRE_IP = ~THRE; assign squashTHRE_IP = prevSquashTHRE_IP & ~resetSquashTHRE_IP; - flopr #(1) squashTHRE_IPreg(HCLK, ~HRESETn, squashTHRE_IP | setSquashTHRE_IP, prevSquashTHRE_IP); + flopr #(1) squashTHRE_IPreg(PCLK, ~PRESETn, squashTHRE_IP | setSquashTHRE_IP, prevSquashTHRE_IP); /////////////////////////////////////////// // modem control logic diff --git a/pipelined/src/uncore/uart.sv b/pipelined/src/uncore/uart_apb.sv similarity index 71% rename from pipelined/src/uncore/uart.sv rename to pipelined/src/uncore/uart_apb.sv index dc620d734..6108b5807 100644 --- a/pipelined/src/uncore/uart.sv +++ b/pipelined/src/uncore/uart_apb.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// uart.sv +// uart_apb.sv // // Written: David_Harris@hmc.edu 21 January 2021 // Modified: @@ -32,23 +32,41 @@ `include "wally-config.vh" -module uart ( +module uart_apb ( + input logic PCLK, PRESETn, + input logic PSEL, + input logic [2:0] PADDR, + input logic [`XLEN-1:0] PWDATA, + input logic [`XLEN/8-1:0] PSTRB, + input logic PWRITE, + input logic PENABLE, + output logic [`XLEN-1:0] PRDATA, + output logic PREADY, +/* input logic HCLK, HRESETn, input logic HSELUART, input logic [2:0] HADDR, input logic HWRITE, - input logic [`XLEN-1:0] HWDATA, + input logic [`XLEN-1:0] PWDATA, output logic [`XLEN-1:0] HREADUART, - output logic HRESPUART, HREADYUART, + output logic HRESPUART, HREADYUART, */ (* mark_debug = "true" *) input logic SIN, DSRb, DCDb, CTSb, RIb, // from E1A driver from RS232 interface (* mark_debug = "true" *) output logic SOUT, RTSb, DTRb, // to E1A driver to RS232 interface (* mark_debug = "true" *) output logic OUT1b, OUT2b, INTR, TXRDYb, RXRDYb); // to CPU // UART interface signals - logic [2:0] A; + logic [2:0] entry; logic MEMRb, MEMWb, memread, memwrite; logic [7:0] Din, Dout; + assign memwrite = PWRITE & PENABLE & PSEL; // only write in access phase + assign memread = ~PWRITE & PENABLE & PSEL; + assign PREADY = 1'b1; // CLINT never takes >1 cycle to respond + assign entry = PADDR[2:0]; + assign MEMRb = ~memread; + assign MEMWb = ~memwrite; + +/* // rename processor interface signals to match PC16550D and provide one-byte interface flopr #(1) memreadreg(HCLK, ~HRESETn, (HSELUART & ~HWRITE), memread); flopr #(1) memwritereg(HCLK, ~HRESETn, (HSELUART & HWRITE), memwrite); @@ -58,29 +76,29 @@ module uart ( assign HRESPUART = 0; // OK assign HREADYUART = 1; // should idle high during address phase and respond high when done; will need to be modified if UART ever needs more than 1 cycle to do something - +*/ if (`XLEN == 64) begin:uart always_comb begin - HREADUART = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout}; - case (A) - 3'b000: Din = HWDATA[7:0]; - 3'b001: Din = HWDATA[15:8]; - 3'b010: Din = HWDATA[23:16]; - 3'b011: Din = HWDATA[31:24]; - 3'b100: Din = HWDATA[39:32]; - 3'b101: Din = HWDATA[47:40]; - 3'b110: Din = HWDATA[55:48]; - 3'b111: Din = HWDATA[63:56]; + PRDATA = {Dout, Dout, Dout, Dout, Dout, Dout, Dout, Dout}; + case (entry) + 3'b000: Din = PWDATA[7:0]; + 3'b001: Din = PWDATA[15:8]; + 3'b010: Din = PWDATA[23:16]; + 3'b011: Din = PWDATA[31:24]; + 3'b100: Din = PWDATA[39:32]; + 3'b101: Din = PWDATA[47:40]; + 3'b110: Din = PWDATA[55:48]; + 3'b111: Din = PWDATA[63:56]; endcase end end else begin:uart // 32-bit always_comb begin - HREADUART = {Dout, Dout, Dout, Dout}; - case (A[1:0]) - 2'b00: Din = HWDATA[7:0]; - 2'b01: Din = HWDATA[15:8]; - 2'b10: Din = HWDATA[23:16]; - 2'b11: Din = HWDATA[31:24]; + PRDATA = {Dout, Dout, Dout, Dout}; + case (entry[1:0]) + 2'b00: Din = PWDATA[7:0]; + 2'b01: Din = PWDATA[15:8]; + 2'b10: Din = PWDATA[23:16]; + 2'b11: Din = PWDATA[31:24]; endcase end end @@ -89,8 +107,8 @@ module uart ( // *** make sure reads don't occur on UART unless fully selected because they could change state. This applies to all peripherals uartPC16550D u( // Processor Interface - .HCLK, .HRESETn, - .A, .Din, + .PCLK, .PRESETn, + .A(entry), .Din, .Dout, .MEMRb, .MEMWb, .INTR, .TXRDYb, .RXRDYb, diff --git a/pipelined/src/uncore/uncore.sv b/pipelined/src/uncore/uncore.sv index a69b7cd0c..888489b9f 100644 --- a/pipelined/src/uncore/uncore.sv +++ b/pipelined/src/uncore/uncore.sv @@ -39,6 +39,7 @@ module uncore ( input logic TIMECLK, input logic [31:0] HADDR, input logic [`AHBW-1:0] HWDATA, + input logic [`XLEN/8-1:0] HWSTRB, input logic HWRITE, input logic [2:0] HSIZE, input logic [2:0] HBURST, @@ -52,7 +53,6 @@ module uncore ( output logic HSELEXT, // delayed signals input logic [2:0] HADDRD, - input logic [3:0] HSIZED, input logic HWRITED, // peripheral pins output logic MTimerInt, MSwInt, MExtInt, SExtInt, @@ -68,19 +68,28 @@ module uncore ( output logic [63:0] MTIME_CLINT ); - logic [`XLEN-1:0] HREADRam, HREADCLINT, HREADPLIC, HREADGPIO, HREADUART, HREADSDC; + logic [`XLEN-1:0] HREADRam, HREADSDC; logic [8:0] HSELRegions; logic HSELRam, HSELCLINT, HSELPLIC, HSELGPIO, HSELUART, HSELSDC; logic HSELEXTD, HSELRamD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD, HSELSDCD; - logic HRESPRam, HRESPCLINT, HRESPPLIC, HRESPGPIO, HRESPUART, HRESPSDC; - logic HREADYRam, HREADYCLINT, HREADYPLIC, HREADYGPIO, HREADYUART, HRESPSDCD; + logic HRESPRam, HRESPSDC; + logic HREADYRam, HRESPSDCD; logic [`XLEN-1:0] HREADBootRom; logic HSELBootRom, HSELBootRomD, HRESPBootRom, HREADYBootRom, HREADYSDC; logic HSELNoneD; logic UARTIntr,GPIOIntr; logic SDCIntM; + logic PCLK, PRESETn, PWRITE, PENABLE; + logic [3:0] PSEL, PREADY; + logic [31:0] PADDR; + logic [`XLEN-1:0] PWDATA; + logic [`XLEN/8-1:0] PSTRB; + logic [3:0][`XLEN-1:0] PRDATA; + logic [`XLEN-1:0] HREADBRIDGE; + logic HRESPBRIDGE, HREADYBRIDGE, HSELBRIDGE, HSELBRIDGED; + // Determine which region of physical memory (if any) is being accessed // Use a trimmed down portion of the PMA checker - only the address decoders // Set access types to all 1 as don't cares because the MMU has already done access checking @@ -89,130 +98,111 @@ module uncore ( // unswizzle HSEL signals assign {HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC} = HSELRegions[7:0]; -// generate - // on-chip RAM - if (`RAM_SUPPORTED) begin : ram - ram #( - .BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( - .HCLK, .HRESETn, - .HSELRam, .HADDR, - .HWRITE, .HREADY, .HSIZED, - .HTRANS, .HWDATA, .HREADRam, - .HRESPRam, .HREADYRam); - end + // AHB -> APB bridge + ahbapbbridge #(4) ahbapbbridge + (.HCLK, .HRESETn, .HSEL({HSELUART, HSELPLIC, HSELCLINT, HSELGPIO}), .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HTRANS, .HREADY, + .HRDATA(HREADBRIDGE), .HRESP(HRESPBRIDGE), .HREADYOUT(HREADYBRIDGE), + .PCLK, .PRESETn, .PSEL, .PWRITE, .PENABLE, .PADDR, .PWDATA, .PSTRB, .PREADY, .PRDATA); + assign HSELBRIDGE = HSELGPIO | HSELCLINT | HSELPLIC | HSELUART; // if any of the bridge signals are selected + + // on-chip RAM + if (`RAM_SUPPORTED) begin : ram + ram #( + .BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram ( + .HCLK, .HRESETn, + .HSELRam, .HADDR, + .HWRITE, .HREADY, + .HTRANS, .HWDATA, .HWSTRB, .HREADRam, + .HRESPRam, .HREADYRam); + end - if (`BOOTROM_SUPPORTED) begin : bootrom - ram_orig #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) - bootrom( - .HCLK, .HRESETn, - .HSELRam(HSELBootRom), .HADDR, - .HWRITE, .HREADY, .HTRANS, .HSIZED, - .HWDATA, - .HREADRam(HREADBootRom), .HRESPRam(HRESPBootRom), .HREADYRam(HREADYBootRom)); - end + // *** switch to new RAM + if (`BOOTROM_SUPPORTED) begin : bootrom + ram #(.BASE(`BOOTROM_BASE), .RANGE(`BOOTROM_RANGE)) + bootrom( + .HCLK, .HRESETn, + .HSELRam(HSELBootRom), .HADDR, + .HWRITE, .HREADY, .HTRANS, + .HWDATA, .HWSTRB, + .HREADRam(HREADBootRom), .HRESPRam(HRESPBootRom), .HREADYRam(HREADYBootRom)); + end - // memory-mapped I/O peripherals - if (`CLINT_SUPPORTED == 1) begin : clint - clint clint( - .HCLK, .HRESETn, .TIMECLK, - .HSELCLINT, .HADDR(HADDR[15:0]), .HWRITE, - .HWDATA, .HREADY, .HTRANS, .HSIZED, - .HREADCLINT, - .HRESPCLINT, .HREADYCLINT, - .MTIME(MTIME_CLINT), - .MTimerInt, .MSwInt); + // memory-mapped I/O peripherals + if (`CLINT_SUPPORTED == 1) begin : clint + clint_apb clint( + .PCLK, .PRESETn, .PSEL(PSEL[1]), .PADDR(PADDR[15:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, + .PRDATA(PRDATA[1]), .PREADY(PREADY[1]), + .MTIME(MTIME_CLINT), + .MTimerInt, .MSwInt); - end else begin : clint - assign MTIME_CLINT = 0; - assign MTimerInt = 0; assign MSwInt = 0; - end - if (`PLIC_SUPPORTED == 1) begin : plic - plic plic( - .HCLK, .HRESETn, - .HSELPLIC, .HADDR(HADDR[27:0]), - .HWRITE, .HREADY, .HTRANS, .HWDATA, - .UARTIntr, .GPIOIntr, - .HREADPLIC, .HRESPPLIC, .HREADYPLIC, - .MExtInt, .SExtInt); - end else begin : plic - assign MExtInt = 0; - assign SExtInt = 0; - end - if (`GPIO_SUPPORTED == 1) begin : gpio - gpio gpio( - .HCLK, .HRESETn, .HSELGPIO, - .HADDR(HADDR[7:0]), - .HWDATA, - .HWRITE, .HREADY, - .HTRANS, - .HREADGPIO, - .HRESPGPIO, .HREADYGPIO, - .GPIOPinsIn, - .GPIOPinsOut, .GPIOPinsEn, - .GPIOIntr); + end else begin : clint + assign MTIME_CLINT = 0; + assign MTimerInt = 0; assign MSwInt = 0; + end + if (`PLIC_SUPPORTED == 1) begin : plic + plic_apb plic( + .PCLK, .PRESETn, .PSEL(PSEL[2]), .PADDR(PADDR[27:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, + .PRDATA(PRDATA[2]), .PREADY(PREADY[2]), + .UARTIntr, .GPIOIntr, + .MExtInt, .SExtInt); + end else begin : plic + assign MExtInt = 0; + assign SExtInt = 0; + end + if (`GPIO_SUPPORTED == 1) begin : gpio + gpio_apb gpio( + .PCLK, .PRESETn, .PSEL(PSEL[0]), .PADDR(PADDR[7:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, + .PRDATA(PRDATA[0]), .PREADY(PREADY[0]), + .iof0(), .iof1(), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, .GPIOIntr); + end else begin : gpio + assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0; + end + if (`UART_SUPPORTED == 1) begin : uart + uart_apb uart( + .PCLK, .PRESETn, .PSEL(PSEL[3]), .PADDR(PADDR[2:0]), .PWDATA, .PSTRB, .PWRITE, .PENABLE, + .PRDATA(PRDATA[3]), .PREADY(PREADY[3]), + .SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface + .SOUT(UARTSout), .RTSb(), .DTRb(), // to E1A driver to RS232 interface + .OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb()); // to CPU + end else begin : uart + assign UARTSout = 0; assign UARTIntr = 0; + end + if (`SDC_SUPPORTED == 1) begin : sdc + SDC SDC(.HCLK, .HRESETn, .HSELSDC, .HADDR(HADDR[4:0]), .HWRITE, .HREADY, .HTRANS, + .HWDATA, .HREADSDC, .HRESPSDC, .HREADYSDC, + // sdc interface + .SDCCmdOut, .SDCCmdIn, .SDCCmdOE, .SDCDatIn, .SDCCLK, + // interrupt to PLIC + .SDCIntM + ); + end else begin : sdc + assign SDCCLK = 0; + assign SDCCmdOut = 0; + assign SDCCmdOE = 0; + end - end else begin : gpio - assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0; - end - if (`UART_SUPPORTED == 1) begin : uart - uart uart( - .HCLK, .HRESETn, - .HSELUART, - .HADDR(HADDR[2:0]), - .HWRITE, .HWDATA, - .HREADUART, .HRESPUART, .HREADYUART, - .SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface - .SOUT(UARTSout), .RTSb(), .DTRb(), // to E1A driver to RS232 interface - .OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb()); // to CPU - end else begin : uart - assign UARTSout = 0; assign UARTIntr = 0; - end - if (`SDC_SUPPORTED == 1) begin : sdc - SDC SDC(.HCLK, .HRESETn, .HSELSDC, .HADDR(HADDR[4:0]), .HWRITE, .HREADY, .HTRANS, - .HWDATA, .HREADSDC, .HRESPSDC, .HREADYSDC, - // sdc interface - .SDCCmdOut, .SDCCmdIn, .SDCCmdOE, .SDCDatIn, .SDCCLK, - // interrupt to PLIC - .SDCIntM - ); - end else begin : sdc - assign SDCCLK = 0; - assign SDCCmdOut = 0; - assign SDCCmdOE = 0; - end -// endgenerate - - // mux could also include external memory // AHB Read Multiplexer assign HRDATA = ({`XLEN{HSELRamD}} & HREADRam) | - ({`XLEN{HSELEXTD}} & HRDATAEXT) | - ({`XLEN{HSELCLINTD}} & HREADCLINT) | - ({`XLEN{HSELPLICD}} & HREADPLIC) | - ({`XLEN{HSELGPIOD}} & HREADGPIO) | + ({`XLEN{HSELEXTD}} & HRDATAEXT) | + ({`XLEN{HSELBRIDGED}} & HREADBRIDGE) | ({`XLEN{HSELBootRomD}} & HREADBootRom) | - ({`XLEN{HSELUARTD}} & HREADUART) | ({`XLEN{HSELSDCD}} & HREADSDC); assign HRESP = HSELRamD & HRESPRam | HSELEXTD & HRESPEXT | - HSELCLINTD & HRESPCLINT | - HSELPLICD & HRESPPLIC | - HSELGPIOD & HRESPGPIO | + HSELBRIDGE & HRESPBRIDGE | HSELBootRomD & HRESPBootRom | - HSELUARTD & HRESPUART | HSELSDC & HRESPSDC; assign HREADY = HSELRamD & HREADYRam | HSELEXTD & HREADYEXT | - HSELCLINTD & HREADYCLINT | - HSELPLICD & HREADYPLIC | - HSELGPIOD & HREADYGPIO | + HSELBRIDGED & HREADYBRIDGE | HSELBootRomD & HREADYBootRom | - HSELUARTD & HREADYUART | HSELSDCD & HREADYSDC | HSELNoneD; // don't lock up the bus if no region is being accessed // Address Decoder Delay (figure 4-2 in spec) flopr #(9) hseldelayreg(HCLK, ~HRESETn, HSELRegions, {HSELNoneD, HSELEXTD, HSELBootRomD, HSELRamD, HSELCLINTD, HSELGPIOD, HSELUARTD, HSELPLICD, HSELSDCD}); + flopr #(1) hselbridgedelayreg(HCLK, ~HRESETn, HSELBRIDGE, HSELBRIDGED); endmodule diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 8ef8ec18b..372f4aba5 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -42,6 +42,7 @@ module wallypipelinedcore ( output logic HCLK, HRESETn, output logic [31:0] HADDR, output logic [`AHBW-1:0] HWDATA, + output logic [`XLEN/8-1:0] HWSTRB, output logic HWRITE, output logic [2:0] HSIZE, output logic [2:0] HBURST, @@ -92,7 +93,7 @@ module wallypipelinedcore ( logic FStallD; logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; - logic FLoad2; + logic FStore2; logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; @@ -115,6 +116,8 @@ module wallypipelinedcore ( logic [1:0] PageType; logic sfencevmaM, wfiM, IntPendingM; logic SelHPTW; + logic [`XLEN/8-1:0] ByteMaskM; + // PMA checker signals var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0]; @@ -256,13 +259,14 @@ module wallypipelinedcore ( .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM, - .FWriteDataM, .FLoad2, + .FWriteDataM, .FStore2, //.DataMisalignedM(DataMisalignedM), .IEUAdrE, .IEUAdrM, .WriteDataE, .ReadDataW, .FlushDCacheM, // connected to ahb (all stay the same) .LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit, .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete, + .ByteMaskM, // connect to csr or privilege and stay the same. .PrivilegeModeW, .BigEndianM, // connects to csr @@ -309,9 +313,10 @@ module wallypipelinedcore ( .LSUTransComplete, .LSUBusAck, .LSUBusInit, + .ByteMaskM, .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn, - .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, + .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HADDRD, .HSIZED, .HWRITED); @@ -395,7 +400,7 @@ module wallypipelinedcore ( .STATUS_FS, // is floating-point enabled? .FRegWriteM, // FP register write enable .FpLoadStoreM, - .FLoad2, + .FStore2, .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory diff --git a/pipelined/src/wally/wallypipelinedsoc.sv b/pipelined/src/wally/wallypipelinedsoc.sv index 54e5b87d8..ff1d95005 100644 --- a/pipelined/src/wally/wallypipelinedsoc.sv +++ b/pipelined/src/wally/wallypipelinedsoc.sv @@ -48,6 +48,7 @@ module wallypipelinedsoc ( output logic HCLK, HRESETn, output logic [31:0] HADDR, output logic [`AHBW-1:0] HWDATA, + output logic [`XLEN/8-1:0] HWSTRB, output logic HWRITE, output logic [2:0] HSIZE, output logic [2:0] HBURST, @@ -79,6 +80,7 @@ module wallypipelinedsoc ( logic [3:0] HSIZED; logic HWRITED; + // synchronize reset to SOC clock domain synchronizer resetsync(.clk, .d(reset_ext), .q(reset)); @@ -86,14 +88,14 @@ module wallypipelinedsoc ( wallypipelinedcore core(.clk, .reset, .MTimerInt, .MExtInt, .SExtInt, .MSwInt, .MTIME_CLINT, - .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn, .HADDR, .HWDATA, + .HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HADDRD, .HSIZED, .HWRITED ); uncore uncore(.HCLK, .HRESETn, .TIMECLK, - .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT, - .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HADDRD, .HSIZED, .HWRITED, + .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT, + .HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HADDRD, .HWRITED, .MTimerInt, .MSwInt, .MExtInt, .SExtInt, .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, .UARTSin, .UARTSout, .MTIME_CLINT, .HSELEXT, .SDCCmdOut, .SDCCmdOE, .SDCCmdIn, .SDCDatIn, .SDCCLK diff --git a/pipelined/src/wally/wallypipelinedsocwrapper.v b/pipelined/src/wally/wallypipelinedsocwrapper.v index d9c513404..2a25f476f 100644 --- a/pipelined/src/wally/wallypipelinedsocwrapper.v +++ b/pipelined/src/wally/wallypipelinedsocwrapper.v @@ -42,6 +42,7 @@ module wallypipelinedsocwrapper ( output HCLK, HRESETn, output [31:0] HADDR, output [`AHBW-1:0] HWDATA, + output logic [`XLEN/8-1:0] HWSTRB, output HWRITE, output [2:0] HSIZE, output [2:0] HBURST, diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index f2ed6f8ca..5d7898b17 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,7 +1,8 @@ -all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -o sqrttestgen -lm + ./sqrttestgen testgen: testgen.c gcc testgen.c -o testgen -lm @@ -28,5 +29,5 @@ inttestgen: inttestgen.c ./inttestgen clean: - rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c index 61fe74aa4..d6bebb774 100644 --- a/pipelined/srt/exptestgen.c +++ b/pipelined/srt/exptestgen.c @@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, // Print r in standard double format fprintf(fptr, "%03x", rExp|(rSign<<11)); printhex(fptr, rFrac); + fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/inttestgen b/pipelined/srt/inttestgen new file mode 100755 index 000000000..2f7e68199 Binary files /dev/null and b/pipelined/srt/inttestgen differ diff --git a/pipelined/srt/inttestgen.c b/pipelined/srt/inttestgen.c new file mode 100644 index 000000000..17ec9299e --- /dev/null +++ b/pipelined/srt/inttestgen.c @@ -0,0 +1,83 @@ +/* testgen.c */ + +/* Written 10/31/96 by David Harris + + This program creates test vectors for mantissa component + of an IEEE floating point divider. + */ + +/* #includes */ + +#include +#include +#include + +/* Constants */ + +#define ENTRIES 10 +#define RANDOM_VECS 500 + +/* Prototypes */ + +void output(FILE *fptr, long a, long b, long r, long rem); +void printhex(FILE *fptr, long x); +double random_input(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + long a, b, r, rem; + long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255}; + int i, j; + + if ((fptr = fopen("inttestvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write testvectors file\n"); + exit(1); + } + + for (i=0; i -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - printf("\tcase({D[5:3],Wmsbs})\n"); - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf("\t\t10'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -4) - printf("0000"); - else if ((pla.tot) >= -13) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 14) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -15) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 15) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -16) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 16) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -18) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 18) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -22) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -24) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); - - } - - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - printf("\tendcase\n"); - -} diff --git a/pipelined/srt/qslc_r4a2b b/pipelined/srt/qslc_r4a2b deleted file mode 100755 index f719bbf47..000000000 Binary files a/pipelined/srt/qslc_r4a2b and /dev/null differ diff --git a/pipelined/srt/qslc_r4a2b.c b/pipelined/srt/qslc_r4a2b.c deleted file mode 100644 index 94a3a4cd4..000000000 --- a/pipelined/srt/qslc_r4a2b.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - Program: qslc_r4a2.c - Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -4) - printf("0"); - else if ((pla.tot) >= -13) - printf("2"); - else - printf("1"); - break; - case 1: - if ((pla.tot) >= 14) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -15) - printf("2"); - else - printf("1"); - break; - case 2: - if ((pla.tot) >= 15) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -16) - printf("2"); - else - printf("1"); - break; - case 3: - if ((pla.tot) >= 16) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -18) - printf("2"); - else - printf("1"); - break; - case 4: - if ((pla.tot) >= 18) - printf("8"); - else if ((pla.tot) >= 6) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -20) - printf("2"); - else - printf("1"); - break; - case 5: - if ((pla.tot) >= 20) - printf("8"); - else if ((pla.tot) >= 6) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -20) - printf("2"); - else - printf("1"); - break; - case 6: - if ((pla.tot) >= 20) - printf("8"); - else if ((pla.tot) >= 8) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -22) - printf("2"); - else - printf("1"); - break; - case 7: - if ((pla.tot) >= 24) - printf("8"); - else if ((pla.tot) >= 8) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -24) - printf("2"); - else - printf("1"); - break; - default: printf ("X"); - - } - - printf("\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - -} diff --git a/pipelined/srt/qslc_r4a2b.tv b/pipelined/srt/qslc_r4a2b.tv deleted file mode 100644 index b92d81e8e..000000000 --- a/pipelined/srt/qslc_r4a2b.tv +++ /dev/null @@ -1,1024 +0,0 @@ -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 deleted file mode 100755 index 5cff70cdf..000000000 Binary files a/pipelined/srt/qslc_sqrt_r4a2 and /dev/null differ diff --git a/pipelined/srt/qslc_sqrt_r4a2.c b/pipelined/srt/qslc_sqrt_r4a2.c deleted file mode 100644 index 252293cc0..000000000 --- a/pipelined/srt/qslc_sqrt_r4a2.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - Program: qslc_r4a2.c - Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - printf("\tcase({D[5:3],Wmsbs})\n"); - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf("\t\t11'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -26) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 28) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -10) - printf("0000"); - else if ((pla.tot) >= -28) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 32) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -32) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 32) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -34) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 36) - printf("1000"); - else if ((pla.tot) >= 12) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -36) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 40) - printf("1000"); - else if ((pla.tot) >= 12) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -40) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 40) - printf("1000"); - else if ((pla.tot) >= 16) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -44) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 44) - printf("1000"); - else if ((pla.tot) >= 16) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -46) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); - - } - - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - printf("\tendcase\n"); - -} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv deleted file mode 100644 index 805dbbaeb..000000000 --- a/pipelined/srt/qslc_sqrt_r4a2.sv +++ /dev/null @@ -1,1026 +0,0 @@ - case({D[5:3],Wmsbs}) - 11'b000_0000000: q = 4'b0000; - 11'b000_0000001: q = 4'b0000; - 11'b000_0000010: q = 4'b0000; - 11'b000_0000011: q = 4'b0000; - 11'b000_0000100: q = 4'b0000; - 11'b000_0000101: q = 4'b0000; - 11'b000_0000110: q = 4'b0000; - 11'b000_0000111: q = 4'b0000; - 11'b000_0001000: q = 4'b0100; - 11'b000_0001001: q = 4'b0100; - 11'b000_0001010: q = 4'b0100; - 11'b000_0001011: q = 4'b0100; - 11'b000_0001100: q = 4'b0100; - 11'b000_0001101: q = 4'b0100; - 11'b000_0001110: q = 4'b0100; - 11'b000_0001111: q = 4'b0100; - 11'b000_0010000: q = 4'b0100; - 11'b000_0010001: q = 4'b0100; - 11'b000_0010010: q = 4'b0100; - 11'b000_0010011: q = 4'b0100; - 11'b000_0010100: q = 4'b0100; - 11'b000_0010101: q = 4'b0100; - 11'b000_0010110: q = 4'b0100; - 11'b000_0010111: q = 4'b0100; - 11'b000_0011000: q = 4'b1000; - 11'b000_0011001: q = 4'b1000; - 11'b000_0011010: q = 4'b1000; - 11'b000_0011011: q = 4'b1000; - 11'b000_0011100: q = 4'b1000; - 11'b000_0011101: q = 4'b1000; - 11'b000_0011110: q = 4'b1000; - 11'b000_0011111: q = 4'b1000; - 11'b000_0100000: q = 4'b1000; - 11'b000_0100001: q = 4'b1000; - 11'b000_0100010: q = 4'b1000; - 11'b000_0100011: q = 4'b1000; - 11'b000_0100100: q = 4'b1000; - 11'b000_0100101: q = 4'b1000; - 11'b000_0100110: q = 4'b1000; - 11'b000_0100111: q = 4'b1000; - 11'b000_0101000: q = 4'b1000; - 11'b000_0101001: q = 4'b1000; - 11'b000_0101010: q = 4'b1000; - 11'b000_0101011: q = 4'b1000; - 11'b000_0101100: q = 4'b1000; - 11'b000_0101101: q = 4'b1000; - 11'b000_0101110: q = 4'b1000; - 11'b000_0101111: q = 4'b1000; - 11'b000_0110000: q = 4'b1000; - 11'b000_0110001: q = 4'b1000; - 11'b000_0110010: q = 4'b1000; - 11'b000_0110011: q = 4'b1000; - 11'b000_0110100: q = 4'b1000; - 11'b000_0110101: q = 4'b1000; - 11'b000_0110110: q = 4'b1000; - 11'b000_0110111: q = 4'b1000; - 11'b000_0111000: q = 4'b1000; - 11'b000_0111001: q = 4'b1000; - 11'b000_0111010: q = 4'b1000; - 11'b000_0111011: q = 4'b1000; - 11'b000_0111100: q = 4'b1000; - 11'b000_0111101: q = 4'b1000; - 11'b000_0111110: q = 4'b1000; - 11'b000_0111111: q = 4'b1000; - 11'b000_1000000: q = 4'b0001; - 11'b000_1000001: q = 4'b0001; - 11'b000_1000010: q = 4'b0001; - 11'b000_1000011: q = 4'b0001; - 11'b000_1000100: q = 4'b0001; - 11'b000_1000101: q = 4'b0001; - 11'b000_1000110: q = 4'b0001; - 11'b000_1000111: q = 4'b0001; - 11'b000_1001000: q = 4'b0001; - 11'b000_1001001: q = 4'b0001; - 11'b000_1001010: q = 4'b0001; - 11'b000_1001011: q = 4'b0001; - 11'b000_1001100: q = 4'b0001; - 11'b000_1001101: q = 4'b0001; - 11'b000_1001110: q = 4'b0001; - 11'b000_1001111: q = 4'b0001; - 11'b000_1010000: q = 4'b0001; - 11'b000_1010001: q = 4'b0001; - 11'b000_1010010: q = 4'b0001; - 11'b000_1010011: q = 4'b0001; - 11'b000_1010100: q = 4'b0001; - 11'b000_1010101: q = 4'b0001; - 11'b000_1010110: q = 4'b0001; - 11'b000_1010111: q = 4'b0001; - 11'b000_1011000: q = 4'b0001; - 11'b000_1011001: q = 4'b0001; - 11'b000_1011010: q = 4'b0001; - 11'b000_1011011: q = 4'b0001; - 11'b000_1011100: q = 4'b0001; - 11'b000_1011101: q = 4'b0001; - 11'b000_1011110: q = 4'b0001; - 11'b000_1011111: q = 4'b0001; - 11'b000_1100000: q = 4'b0001; - 11'b000_1100001: q = 4'b0001; - 11'b000_1100010: q = 4'b0001; - 11'b000_1100011: q = 4'b0001; - 11'b000_1100100: q = 4'b0001; - 11'b000_1100101: q = 4'b0001; - 11'b000_1100110: q = 4'b0010; - 11'b000_1100111: q = 4'b0010; - 11'b000_1101000: q = 4'b0010; - 11'b000_1101001: q = 4'b0010; - 11'b000_1101010: q = 4'b0010; - 11'b000_1101011: q = 4'b0010; - 11'b000_1101100: q = 4'b0010; - 11'b000_1101101: q = 4'b0010; - 11'b000_1101110: q = 4'b0010; - 11'b000_1101111: q = 4'b0010; - 11'b000_1110000: q = 4'b0010; - 11'b000_1110001: q = 4'b0010; - 11'b000_1110010: q = 4'b0010; - 11'b000_1110011: q = 4'b0010; - 11'b000_1110100: q = 4'b0010; - 11'b000_1110101: q = 4'b0010; - 11'b000_1110110: q = 4'b0010; - 11'b000_1110111: q = 4'b0010; - 11'b000_1111000: q = 4'b0000; - 11'b000_1111001: q = 4'b0000; - 11'b000_1111010: q = 4'b0000; - 11'b000_1111011: q = 4'b0000; - 11'b000_1111100: q = 4'b0000; - 11'b000_1111101: q = 4'b0000; - 11'b000_1111110: q = 4'b0000; - 11'b000_1111111: q = 4'b0000; - 11'b001_0000000: q = 4'b0000; - 11'b001_0000001: q = 4'b0000; - 11'b001_0000010: q = 4'b0000; - 11'b001_0000011: q = 4'b0000; - 11'b001_0000100: q = 4'b0000; - 11'b001_0000101: q = 4'b0000; - 11'b001_0000110: q = 4'b0000; - 11'b001_0000111: q = 4'b0000; - 11'b001_0001000: q = 4'b0100; - 11'b001_0001001: q = 4'b0100; - 11'b001_0001010: q = 4'b0100; - 11'b001_0001011: q = 4'b0100; - 11'b001_0001100: q = 4'b0100; - 11'b001_0001101: q = 4'b0100; - 11'b001_0001110: q = 4'b0100; - 11'b001_0001111: q = 4'b0100; - 11'b001_0010000: q = 4'b0100; - 11'b001_0010001: q = 4'b0100; - 11'b001_0010010: q = 4'b0100; - 11'b001_0010011: q = 4'b0100; - 11'b001_0010100: q = 4'b0100; - 11'b001_0010101: q = 4'b0100; - 11'b001_0010110: q = 4'b0100; - 11'b001_0010111: q = 4'b0100; - 11'b001_0011000: q = 4'b0100; - 11'b001_0011001: q = 4'b0100; - 11'b001_0011010: q = 4'b0100; - 11'b001_0011011: q = 4'b0100; - 11'b001_0011100: q = 4'b1000; - 11'b001_0011101: q = 4'b1000; - 11'b001_0011110: q = 4'b1000; - 11'b001_0011111: q = 4'b1000; - 11'b001_0100000: q = 4'b1000; - 11'b001_0100001: q = 4'b1000; - 11'b001_0100010: q = 4'b1000; - 11'b001_0100011: q = 4'b1000; - 11'b001_0100100: q = 4'b1000; - 11'b001_0100101: q = 4'b1000; - 11'b001_0100110: q = 4'b1000; - 11'b001_0100111: q = 4'b1000; - 11'b001_0101000: q = 4'b1000; - 11'b001_0101001: q = 4'b1000; - 11'b001_0101010: q = 4'b1000; - 11'b001_0101011: q = 4'b1000; - 11'b001_0101100: q = 4'b1000; - 11'b001_0101101: q = 4'b1000; - 11'b001_0101110: q = 4'b1000; - 11'b001_0101111: q = 4'b1000; - 11'b001_0110000: q = 4'b1000; - 11'b001_0110001: q = 4'b1000; - 11'b001_0110010: q = 4'b1000; - 11'b001_0110011: q = 4'b1000; - 11'b001_0110100: q = 4'b1000; - 11'b001_0110101: q = 4'b1000; - 11'b001_0110110: q = 4'b1000; - 11'b001_0110111: q = 4'b1000; - 11'b001_0111000: q = 4'b1000; - 11'b001_0111001: q = 4'b1000; - 11'b001_0111010: q = 4'b1000; - 11'b001_0111011: q = 4'b1000; - 11'b001_0111100: q = 4'b1000; - 11'b001_0111101: q = 4'b1000; - 11'b001_0111110: q = 4'b1000; - 11'b001_0111111: q = 4'b1000; - 11'b001_1000000: q = 4'b0001; - 11'b001_1000001: q = 4'b0001; - 11'b001_1000010: q = 4'b0001; - 11'b001_1000011: q = 4'b0001; - 11'b001_1000100: q = 4'b0001; - 11'b001_1000101: q = 4'b0001; - 11'b001_1000110: q = 4'b0001; - 11'b001_1000111: q = 4'b0001; - 11'b001_1001000: q = 4'b0001; - 11'b001_1001001: q = 4'b0001; - 11'b001_1001010: q = 4'b0001; - 11'b001_1001011: q = 4'b0001; - 11'b001_1001100: q = 4'b0001; - 11'b001_1001101: q = 4'b0001; - 11'b001_1001110: q = 4'b0001; - 11'b001_1001111: q = 4'b0001; - 11'b001_1010000: q = 4'b0001; - 11'b001_1010001: q = 4'b0001; - 11'b001_1010010: q = 4'b0001; - 11'b001_1010011: q = 4'b0001; - 11'b001_1010100: q = 4'b0001; - 11'b001_1010101: q = 4'b0001; - 11'b001_1010110: q = 4'b0001; - 11'b001_1010111: q = 4'b0001; - 11'b001_1011000: q = 4'b0001; - 11'b001_1011001: q = 4'b0001; - 11'b001_1011010: q = 4'b0001; - 11'b001_1011011: q = 4'b0001; - 11'b001_1011100: q = 4'b0001; - 11'b001_1011101: q = 4'b0001; - 11'b001_1011110: q = 4'b0001; - 11'b001_1011111: q = 4'b0001; - 11'b001_1100000: q = 4'b0001; - 11'b001_1100001: q = 4'b0001; - 11'b001_1100010: q = 4'b0001; - 11'b001_1100011: q = 4'b0001; - 11'b001_1100100: q = 4'b0010; - 11'b001_1100101: q = 4'b0010; - 11'b001_1100110: q = 4'b0010; - 11'b001_1100111: q = 4'b0010; - 11'b001_1101000: q = 4'b0010; - 11'b001_1101001: q = 4'b0010; - 11'b001_1101010: q = 4'b0010; - 11'b001_1101011: q = 4'b0010; - 11'b001_1101100: q = 4'b0010; - 11'b001_1101101: q = 4'b0010; - 11'b001_1101110: q = 4'b0010; - 11'b001_1101111: q = 4'b0010; - 11'b001_1110000: q = 4'b0010; - 11'b001_1110001: q = 4'b0010; - 11'b001_1110010: q = 4'b0010; - 11'b001_1110011: q = 4'b0010; - 11'b001_1110100: q = 4'b0010; - 11'b001_1110101: q = 4'b0010; - 11'b001_1110110: q = 4'b0000; - 11'b001_1110111: q = 4'b0000; - 11'b001_1111000: q = 4'b0000; - 11'b001_1111001: q = 4'b0000; - 11'b001_1111010: q = 4'b0000; - 11'b001_1111011: q = 4'b0000; - 11'b001_1111100: q = 4'b0000; - 11'b001_1111101: q = 4'b0000; - 11'b001_1111110: q = 4'b0000; - 11'b001_1111111: q = 4'b0000; - 11'b010_0000000: q = 4'b0000; - 11'b010_0000001: q = 4'b0000; - 11'b010_0000010: q = 4'b0000; - 11'b010_0000011: q = 4'b0000; - 11'b010_0000100: q = 4'b0000; - 11'b010_0000101: q = 4'b0000; - 11'b010_0000110: q = 4'b0000; - 11'b010_0000111: q = 4'b0000; - 11'b010_0001000: q = 4'b0100; - 11'b010_0001001: q = 4'b0100; - 11'b010_0001010: q = 4'b0100; - 11'b010_0001011: q = 4'b0100; - 11'b010_0001100: q = 4'b0100; - 11'b010_0001101: q = 4'b0100; - 11'b010_0001110: q = 4'b0100; - 11'b010_0001111: q = 4'b0100; - 11'b010_0010000: q = 4'b0100; - 11'b010_0010001: q = 4'b0100; - 11'b010_0010010: q = 4'b0100; - 11'b010_0010011: q = 4'b0100; - 11'b010_0010100: q = 4'b0100; - 11'b010_0010101: q = 4'b0100; - 11'b010_0010110: q = 4'b0100; - 11'b010_0010111: q = 4'b0100; - 11'b010_0011000: q = 4'b0100; - 11'b010_0011001: q = 4'b0100; - 11'b010_0011010: q = 4'b0100; - 11'b010_0011011: q = 4'b0100; - 11'b010_0011100: q = 4'b0100; - 11'b010_0011101: q = 4'b0100; - 11'b010_0011110: q = 4'b0100; - 11'b010_0011111: q = 4'b0100; - 11'b010_0100000: q = 4'b1000; - 11'b010_0100001: q = 4'b1000; - 11'b010_0100010: q = 4'b1000; - 11'b010_0100011: q = 4'b1000; - 11'b010_0100100: q = 4'b1000; - 11'b010_0100101: q = 4'b1000; - 11'b010_0100110: q = 4'b1000; - 11'b010_0100111: q = 4'b1000; - 11'b010_0101000: q = 4'b1000; - 11'b010_0101001: q = 4'b1000; - 11'b010_0101010: q = 4'b1000; - 11'b010_0101011: q = 4'b1000; - 11'b010_0101100: q = 4'b1000; - 11'b010_0101101: q = 4'b1000; - 11'b010_0101110: q = 4'b1000; - 11'b010_0101111: q = 4'b1000; - 11'b010_0110000: q = 4'b1000; - 11'b010_0110001: q = 4'b1000; - 11'b010_0110010: q = 4'b1000; - 11'b010_0110011: q = 4'b1000; - 11'b010_0110100: q = 4'b1000; - 11'b010_0110101: q = 4'b1000; - 11'b010_0110110: q = 4'b1000; - 11'b010_0110111: q = 4'b1000; - 11'b010_0111000: q = 4'b1000; - 11'b010_0111001: q = 4'b1000; - 11'b010_0111010: q = 4'b1000; - 11'b010_0111011: q = 4'b1000; - 11'b010_0111100: q = 4'b1000; - 11'b010_0111101: q = 4'b1000; - 11'b010_0111110: q = 4'b1000; - 11'b010_0111111: q = 4'b1000; - 11'b010_1000000: q = 4'b0001; - 11'b010_1000001: q = 4'b0001; - 11'b010_1000010: q = 4'b0001; - 11'b010_1000011: q = 4'b0001; - 11'b010_1000100: q = 4'b0001; - 11'b010_1000101: q = 4'b0001; - 11'b010_1000110: q = 4'b0001; - 11'b010_1000111: q = 4'b0001; - 11'b010_1001000: q = 4'b0001; - 11'b010_1001001: q = 4'b0001; - 11'b010_1001010: q = 4'b0001; - 11'b010_1001011: q = 4'b0001; - 11'b010_1001100: q = 4'b0001; - 11'b010_1001101: q = 4'b0001; - 11'b010_1001110: q = 4'b0001; - 11'b010_1001111: q = 4'b0001; - 11'b010_1010000: q = 4'b0001; - 11'b010_1010001: q = 4'b0001; - 11'b010_1010010: q = 4'b0001; - 11'b010_1010011: q = 4'b0001; - 11'b010_1010100: q = 4'b0001; - 11'b010_1010101: q = 4'b0001; - 11'b010_1010110: q = 4'b0001; - 11'b010_1010111: q = 4'b0001; - 11'b010_1011000: q = 4'b0001; - 11'b010_1011001: q = 4'b0001; - 11'b010_1011010: q = 4'b0001; - 11'b010_1011011: q = 4'b0001; - 11'b010_1011100: q = 4'b0001; - 11'b010_1011101: q = 4'b0001; - 11'b010_1011110: q = 4'b0001; - 11'b010_1011111: q = 4'b0001; - 11'b010_1100000: q = 4'b0010; - 11'b010_1100001: q = 4'b0010; - 11'b010_1100010: q = 4'b0010; - 11'b010_1100011: q = 4'b0010; - 11'b010_1100100: q = 4'b0010; - 11'b010_1100101: q = 4'b0010; - 11'b010_1100110: q = 4'b0010; - 11'b010_1100111: q = 4'b0010; - 11'b010_1101000: q = 4'b0010; - 11'b010_1101001: q = 4'b0010; - 11'b010_1101010: q = 4'b0010; - 11'b010_1101011: q = 4'b0010; - 11'b010_1101100: q = 4'b0010; - 11'b010_1101101: q = 4'b0010; - 11'b010_1101110: q = 4'b0010; - 11'b010_1101111: q = 4'b0010; - 11'b010_1110000: q = 4'b0010; - 11'b010_1110001: q = 4'b0010; - 11'b010_1110010: q = 4'b0010; - 11'b010_1110011: q = 4'b0010; - 11'b010_1110100: q = 4'b0000; - 11'b010_1110101: q = 4'b0000; - 11'b010_1110110: q = 4'b0000; - 11'b010_1110111: q = 4'b0000; - 11'b010_1111000: q = 4'b0000; - 11'b010_1111001: q = 4'b0000; - 11'b010_1111010: q = 4'b0000; - 11'b010_1111011: q = 4'b0000; - 11'b010_1111100: q = 4'b0000; - 11'b010_1111101: q = 4'b0000; - 11'b010_1111110: q = 4'b0000; - 11'b010_1111111: q = 4'b0000; - 11'b011_0000000: q = 4'b0000; - 11'b011_0000001: q = 4'b0000; - 11'b011_0000010: q = 4'b0000; - 11'b011_0000011: q = 4'b0000; - 11'b011_0000100: q = 4'b0000; - 11'b011_0000101: q = 4'b0000; - 11'b011_0000110: q = 4'b0000; - 11'b011_0000111: q = 4'b0000; - 11'b011_0001000: q = 4'b0100; - 11'b011_0001001: q = 4'b0100; - 11'b011_0001010: q = 4'b0100; - 11'b011_0001011: q = 4'b0100; - 11'b011_0001100: q = 4'b0100; - 11'b011_0001101: q = 4'b0100; - 11'b011_0001110: q = 4'b0100; - 11'b011_0001111: q = 4'b0100; - 11'b011_0010000: q = 4'b0100; - 11'b011_0010001: q = 4'b0100; - 11'b011_0010010: q = 4'b0100; - 11'b011_0010011: q = 4'b0100; - 11'b011_0010100: q = 4'b0100; - 11'b011_0010101: q = 4'b0100; - 11'b011_0010110: q = 4'b0100; - 11'b011_0010111: q = 4'b0100; - 11'b011_0011000: q = 4'b0100; - 11'b011_0011001: q = 4'b0100; - 11'b011_0011010: q = 4'b0100; - 11'b011_0011011: q = 4'b0100; - 11'b011_0011100: q = 4'b0100; - 11'b011_0011101: q = 4'b0100; - 11'b011_0011110: q = 4'b0100; - 11'b011_0011111: q = 4'b0100; - 11'b011_0100000: q = 4'b1000; - 11'b011_0100001: q = 4'b1000; - 11'b011_0100010: q = 4'b1000; - 11'b011_0100011: q = 4'b1000; - 11'b011_0100100: q = 4'b1000; - 11'b011_0100101: q = 4'b1000; - 11'b011_0100110: q = 4'b1000; - 11'b011_0100111: q = 4'b1000; - 11'b011_0101000: q = 4'b1000; - 11'b011_0101001: q = 4'b1000; - 11'b011_0101010: q = 4'b1000; - 11'b011_0101011: q = 4'b1000; - 11'b011_0101100: q = 4'b1000; - 11'b011_0101101: q = 4'b1000; - 11'b011_0101110: q = 4'b1000; - 11'b011_0101111: q = 4'b1000; - 11'b011_0110000: q = 4'b1000; - 11'b011_0110001: q = 4'b1000; - 11'b011_0110010: q = 4'b1000; - 11'b011_0110011: q = 4'b1000; - 11'b011_0110100: q = 4'b1000; - 11'b011_0110101: q = 4'b1000; - 11'b011_0110110: q = 4'b1000; - 11'b011_0110111: q = 4'b1000; - 11'b011_0111000: q = 4'b1000; - 11'b011_0111001: q = 4'b1000; - 11'b011_0111010: q = 4'b1000; - 11'b011_0111011: q = 4'b1000; - 11'b011_0111100: q = 4'b1000; - 11'b011_0111101: q = 4'b1000; - 11'b011_0111110: q = 4'b1000; - 11'b011_0111111: q = 4'b1000; - 11'b011_1000000: q = 4'b0001; - 11'b011_1000001: q = 4'b0001; - 11'b011_1000010: q = 4'b0001; - 11'b011_1000011: q = 4'b0001; - 11'b011_1000100: q = 4'b0001; - 11'b011_1000101: q = 4'b0001; - 11'b011_1000110: q = 4'b0001; - 11'b011_1000111: q = 4'b0001; - 11'b011_1001000: q = 4'b0001; - 11'b011_1001001: q = 4'b0001; - 11'b011_1001010: q = 4'b0001; - 11'b011_1001011: q = 4'b0001; - 11'b011_1001100: q = 4'b0001; - 11'b011_1001101: q = 4'b0001; - 11'b011_1001110: q = 4'b0001; - 11'b011_1001111: q = 4'b0001; - 11'b011_1010000: q = 4'b0001; - 11'b011_1010001: q = 4'b0001; - 11'b011_1010010: q = 4'b0001; - 11'b011_1010011: q = 4'b0001; - 11'b011_1010100: q = 4'b0001; - 11'b011_1010101: q = 4'b0001; - 11'b011_1010110: q = 4'b0001; - 11'b011_1010111: q = 4'b0001; - 11'b011_1011000: q = 4'b0001; - 11'b011_1011001: q = 4'b0001; - 11'b011_1011010: q = 4'b0001; - 11'b011_1011011: q = 4'b0001; - 11'b011_1011100: q = 4'b0001; - 11'b011_1011101: q = 4'b0001; - 11'b011_1011110: q = 4'b0010; - 11'b011_1011111: q = 4'b0010; - 11'b011_1100000: q = 4'b0010; - 11'b011_1100001: q = 4'b0010; - 11'b011_1100010: q = 4'b0010; - 11'b011_1100011: q = 4'b0010; - 11'b011_1100100: q = 4'b0010; - 11'b011_1100101: q = 4'b0010; - 11'b011_1100110: q = 4'b0010; - 11'b011_1100111: q = 4'b0010; - 11'b011_1101000: q = 4'b0010; - 11'b011_1101001: q = 4'b0010; - 11'b011_1101010: q = 4'b0010; - 11'b011_1101011: q = 4'b0010; - 11'b011_1101100: q = 4'b0010; - 11'b011_1101101: q = 4'b0010; - 11'b011_1101110: q = 4'b0010; - 11'b011_1101111: q = 4'b0010; - 11'b011_1110000: q = 4'b0010; - 11'b011_1110001: q = 4'b0010; - 11'b011_1110010: q = 4'b0010; - 11'b011_1110011: q = 4'b0010; - 11'b011_1110100: q = 4'b0000; - 11'b011_1110101: q = 4'b0000; - 11'b011_1110110: q = 4'b0000; - 11'b011_1110111: q = 4'b0000; - 11'b011_1111000: q = 4'b0000; - 11'b011_1111001: q = 4'b0000; - 11'b011_1111010: q = 4'b0000; - 11'b011_1111011: q = 4'b0000; - 11'b011_1111100: q = 4'b0000; - 11'b011_1111101: q = 4'b0000; - 11'b011_1111110: q = 4'b0000; - 11'b011_1111111: q = 4'b0000; - 11'b100_0000000: q = 4'b0000; - 11'b100_0000001: q = 4'b0000; - 11'b100_0000010: q = 4'b0000; - 11'b100_0000011: q = 4'b0000; - 11'b100_0000100: q = 4'b0000; - 11'b100_0000101: q = 4'b0000; - 11'b100_0000110: q = 4'b0000; - 11'b100_0000111: q = 4'b0000; - 11'b100_0001000: q = 4'b0000; - 11'b100_0001001: q = 4'b0000; - 11'b100_0001010: q = 4'b0000; - 11'b100_0001011: q = 4'b0000; - 11'b100_0001100: q = 4'b0100; - 11'b100_0001101: q = 4'b0100; - 11'b100_0001110: q = 4'b0100; - 11'b100_0001111: q = 4'b0100; - 11'b100_0010000: q = 4'b0100; - 11'b100_0010001: q = 4'b0100; - 11'b100_0010010: q = 4'b0100; - 11'b100_0010011: q = 4'b0100; - 11'b100_0010100: q = 4'b0100; - 11'b100_0010101: q = 4'b0100; - 11'b100_0010110: q = 4'b0100; - 11'b100_0010111: q = 4'b0100; - 11'b100_0011000: q = 4'b0100; - 11'b100_0011001: q = 4'b0100; - 11'b100_0011010: q = 4'b0100; - 11'b100_0011011: q = 4'b0100; - 11'b100_0011100: q = 4'b0100; - 11'b100_0011101: q = 4'b0100; - 11'b100_0011110: q = 4'b0100; - 11'b100_0011111: q = 4'b0100; - 11'b100_0100000: q = 4'b0100; - 11'b100_0100001: q = 4'b0100; - 11'b100_0100010: q = 4'b0100; - 11'b100_0100011: q = 4'b0100; - 11'b100_0100100: q = 4'b1000; - 11'b100_0100101: q = 4'b1000; - 11'b100_0100110: q = 4'b1000; - 11'b100_0100111: q = 4'b1000; - 11'b100_0101000: q = 4'b1000; - 11'b100_0101001: q = 4'b1000; - 11'b100_0101010: q = 4'b1000; - 11'b100_0101011: q = 4'b1000; - 11'b100_0101100: q = 4'b1000; - 11'b100_0101101: q = 4'b1000; - 11'b100_0101110: q = 4'b1000; - 11'b100_0101111: q = 4'b1000; - 11'b100_0110000: q = 4'b1000; - 11'b100_0110001: q = 4'b1000; - 11'b100_0110010: q = 4'b1000; - 11'b100_0110011: q = 4'b1000; - 11'b100_0110100: q = 4'b1000; - 11'b100_0110101: q = 4'b1000; - 11'b100_0110110: q = 4'b1000; - 11'b100_0110111: q = 4'b1000; - 11'b100_0111000: q = 4'b1000; - 11'b100_0111001: q = 4'b1000; - 11'b100_0111010: q = 4'b1000; - 11'b100_0111011: q = 4'b1000; - 11'b100_0111100: q = 4'b1000; - 11'b100_0111101: q = 4'b1000; - 11'b100_0111110: q = 4'b1000; - 11'b100_0111111: q = 4'b1000; - 11'b100_1000000: q = 4'b0001; - 11'b100_1000001: q = 4'b0001; - 11'b100_1000010: q = 4'b0001; - 11'b100_1000011: q = 4'b0001; - 11'b100_1000100: q = 4'b0001; - 11'b100_1000101: q = 4'b0001; - 11'b100_1000110: q = 4'b0001; - 11'b100_1000111: q = 4'b0001; - 11'b100_1001000: q = 4'b0001; - 11'b100_1001001: q = 4'b0001; - 11'b100_1001010: q = 4'b0001; - 11'b100_1001011: q = 4'b0001; - 11'b100_1001100: q = 4'b0001; - 11'b100_1001101: q = 4'b0001; - 11'b100_1001110: q = 4'b0001; - 11'b100_1001111: q = 4'b0001; - 11'b100_1010000: q = 4'b0001; - 11'b100_1010001: q = 4'b0001; - 11'b100_1010010: q = 4'b0001; - 11'b100_1010011: q = 4'b0001; - 11'b100_1010100: q = 4'b0001; - 11'b100_1010101: q = 4'b0001; - 11'b100_1010110: q = 4'b0001; - 11'b100_1010111: q = 4'b0001; - 11'b100_1011000: q = 4'b0001; - 11'b100_1011001: q = 4'b0001; - 11'b100_1011010: q = 4'b0001; - 11'b100_1011011: q = 4'b0001; - 11'b100_1011100: q = 4'b0010; - 11'b100_1011101: q = 4'b0010; - 11'b100_1011110: q = 4'b0010; - 11'b100_1011111: q = 4'b0010; - 11'b100_1100000: q = 4'b0010; - 11'b100_1100001: q = 4'b0010; - 11'b100_1100010: q = 4'b0010; - 11'b100_1100011: q = 4'b0010; - 11'b100_1100100: q = 4'b0010; - 11'b100_1100101: q = 4'b0010; - 11'b100_1100110: q = 4'b0010; - 11'b100_1100111: q = 4'b0010; - 11'b100_1101000: q = 4'b0010; - 11'b100_1101001: q = 4'b0010; - 11'b100_1101010: q = 4'b0010; - 11'b100_1101011: q = 4'b0010; - 11'b100_1101100: q = 4'b0010; - 11'b100_1101101: q = 4'b0010; - 11'b100_1101110: q = 4'b0010; - 11'b100_1101111: q = 4'b0010; - 11'b100_1110000: q = 4'b0010; - 11'b100_1110001: q = 4'b0010; - 11'b100_1110010: q = 4'b0010; - 11'b100_1110011: q = 4'b0010; - 11'b100_1110100: q = 4'b0000; - 11'b100_1110101: q = 4'b0000; - 11'b100_1110110: q = 4'b0000; - 11'b100_1110111: q = 4'b0000; - 11'b100_1111000: q = 4'b0000; - 11'b100_1111001: q = 4'b0000; - 11'b100_1111010: q = 4'b0000; - 11'b100_1111011: q = 4'b0000; - 11'b100_1111100: q = 4'b0000; - 11'b100_1111101: q = 4'b0000; - 11'b100_1111110: q = 4'b0000; - 11'b100_1111111: q = 4'b0000; - 11'b101_0000000: q = 4'b0000; - 11'b101_0000001: q = 4'b0000; - 11'b101_0000010: q = 4'b0000; - 11'b101_0000011: q = 4'b0000; - 11'b101_0000100: q = 4'b0000; - 11'b101_0000101: q = 4'b0000; - 11'b101_0000110: q = 4'b0000; - 11'b101_0000111: q = 4'b0000; - 11'b101_0001000: q = 4'b0000; - 11'b101_0001001: q = 4'b0000; - 11'b101_0001010: q = 4'b0000; - 11'b101_0001011: q = 4'b0000; - 11'b101_0001100: q = 4'b0100; - 11'b101_0001101: q = 4'b0100; - 11'b101_0001110: q = 4'b0100; - 11'b101_0001111: q = 4'b0100; - 11'b101_0010000: q = 4'b0100; - 11'b101_0010001: q = 4'b0100; - 11'b101_0010010: q = 4'b0100; - 11'b101_0010011: q = 4'b0100; - 11'b101_0010100: q = 4'b0100; - 11'b101_0010101: q = 4'b0100; - 11'b101_0010110: q = 4'b0100; - 11'b101_0010111: q = 4'b0100; - 11'b101_0011000: q = 4'b0100; - 11'b101_0011001: q = 4'b0100; - 11'b101_0011010: q = 4'b0100; - 11'b101_0011011: q = 4'b0100; - 11'b101_0011100: q = 4'b0100; - 11'b101_0011101: q = 4'b0100; - 11'b101_0011110: q = 4'b0100; - 11'b101_0011111: q = 4'b0100; - 11'b101_0100000: q = 4'b0100; - 11'b101_0100001: q = 4'b0100; - 11'b101_0100010: q = 4'b0100; - 11'b101_0100011: q = 4'b0100; - 11'b101_0100100: q = 4'b0100; - 11'b101_0100101: q = 4'b0100; - 11'b101_0100110: q = 4'b0100; - 11'b101_0100111: q = 4'b0100; - 11'b101_0101000: q = 4'b1000; - 11'b101_0101001: q = 4'b1000; - 11'b101_0101010: q = 4'b1000; - 11'b101_0101011: q = 4'b1000; - 11'b101_0101100: q = 4'b1000; - 11'b101_0101101: q = 4'b1000; - 11'b101_0101110: q = 4'b1000; - 11'b101_0101111: q = 4'b1000; - 11'b101_0110000: q = 4'b1000; - 11'b101_0110001: q = 4'b1000; - 11'b101_0110010: q = 4'b1000; - 11'b101_0110011: q = 4'b1000; - 11'b101_0110100: q = 4'b1000; - 11'b101_0110101: q = 4'b1000; - 11'b101_0110110: q = 4'b1000; - 11'b101_0110111: q = 4'b1000; - 11'b101_0111000: q = 4'b1000; - 11'b101_0111001: q = 4'b1000; - 11'b101_0111010: q = 4'b1000; - 11'b101_0111011: q = 4'b1000; - 11'b101_0111100: q = 4'b1000; - 11'b101_0111101: q = 4'b1000; - 11'b101_0111110: q = 4'b1000; - 11'b101_0111111: q = 4'b1000; - 11'b101_1000000: q = 4'b0001; - 11'b101_1000001: q = 4'b0001; - 11'b101_1000010: q = 4'b0001; - 11'b101_1000011: q = 4'b0001; - 11'b101_1000100: q = 4'b0001; - 11'b101_1000101: q = 4'b0001; - 11'b101_1000110: q = 4'b0001; - 11'b101_1000111: q = 4'b0001; - 11'b101_1001000: q = 4'b0001; - 11'b101_1001001: q = 4'b0001; - 11'b101_1001010: q = 4'b0001; - 11'b101_1001011: q = 4'b0001; - 11'b101_1001100: q = 4'b0001; - 11'b101_1001101: q = 4'b0001; - 11'b101_1001110: q = 4'b0001; - 11'b101_1001111: q = 4'b0001; - 11'b101_1010000: q = 4'b0001; - 11'b101_1010001: q = 4'b0001; - 11'b101_1010010: q = 4'b0001; - 11'b101_1010011: q = 4'b0001; - 11'b101_1010100: q = 4'b0001; - 11'b101_1010101: q = 4'b0001; - 11'b101_1010110: q = 4'b0001; - 11'b101_1010111: q = 4'b0001; - 11'b101_1011000: q = 4'b0010; - 11'b101_1011001: q = 4'b0010; - 11'b101_1011010: q = 4'b0010; - 11'b101_1011011: q = 4'b0010; - 11'b101_1011100: q = 4'b0010; - 11'b101_1011101: q = 4'b0010; - 11'b101_1011110: q = 4'b0010; - 11'b101_1011111: q = 4'b0010; - 11'b101_1100000: q = 4'b0010; - 11'b101_1100001: q = 4'b0010; - 11'b101_1100010: q = 4'b0010; - 11'b101_1100011: q = 4'b0010; - 11'b101_1100100: q = 4'b0010; - 11'b101_1100101: q = 4'b0010; - 11'b101_1100110: q = 4'b0010; - 11'b101_1100111: q = 4'b0010; - 11'b101_1101000: q = 4'b0010; - 11'b101_1101001: q = 4'b0010; - 11'b101_1101010: q = 4'b0010; - 11'b101_1101011: q = 4'b0010; - 11'b101_1101100: q = 4'b0010; - 11'b101_1101101: q = 4'b0010; - 11'b101_1101110: q = 4'b0010; - 11'b101_1101111: q = 4'b0010; - 11'b101_1110000: q = 4'b0000; - 11'b101_1110001: q = 4'b0000; - 11'b101_1110010: q = 4'b0000; - 11'b101_1110011: q = 4'b0000; - 11'b101_1110100: q = 4'b0000; - 11'b101_1110101: q = 4'b0000; - 11'b101_1110110: q = 4'b0000; - 11'b101_1110111: q = 4'b0000; - 11'b101_1111000: q = 4'b0000; - 11'b101_1111001: q = 4'b0000; - 11'b101_1111010: q = 4'b0000; - 11'b101_1111011: q = 4'b0000; - 11'b101_1111100: q = 4'b0000; - 11'b101_1111101: q = 4'b0000; - 11'b101_1111110: q = 4'b0000; - 11'b101_1111111: q = 4'b0000; - 11'b110_0000000: q = 4'b0000; - 11'b110_0000001: q = 4'b0000; - 11'b110_0000010: q = 4'b0000; - 11'b110_0000011: q = 4'b0000; - 11'b110_0000100: q = 4'b0000; - 11'b110_0000101: q = 4'b0000; - 11'b110_0000110: q = 4'b0000; - 11'b110_0000111: q = 4'b0000; - 11'b110_0001000: q = 4'b0000; - 11'b110_0001001: q = 4'b0000; - 11'b110_0001010: q = 4'b0000; - 11'b110_0001011: q = 4'b0000; - 11'b110_0001100: q = 4'b0000; - 11'b110_0001101: q = 4'b0000; - 11'b110_0001110: q = 4'b0000; - 11'b110_0001111: q = 4'b0000; - 11'b110_0010000: q = 4'b0100; - 11'b110_0010001: q = 4'b0100; - 11'b110_0010010: q = 4'b0100; - 11'b110_0010011: q = 4'b0100; - 11'b110_0010100: q = 4'b0100; - 11'b110_0010101: q = 4'b0100; - 11'b110_0010110: q = 4'b0100; - 11'b110_0010111: q = 4'b0100; - 11'b110_0011000: q = 4'b0100; - 11'b110_0011001: q = 4'b0100; - 11'b110_0011010: q = 4'b0100; - 11'b110_0011011: q = 4'b0100; - 11'b110_0011100: q = 4'b0100; - 11'b110_0011101: q = 4'b0100; - 11'b110_0011110: q = 4'b0100; - 11'b110_0011111: q = 4'b0100; - 11'b110_0100000: q = 4'b0100; - 11'b110_0100001: q = 4'b0100; - 11'b110_0100010: q = 4'b0100; - 11'b110_0100011: q = 4'b0100; - 11'b110_0100100: q = 4'b0100; - 11'b110_0100101: q = 4'b0100; - 11'b110_0100110: q = 4'b0100; - 11'b110_0100111: q = 4'b0100; - 11'b110_0101000: q = 4'b1000; - 11'b110_0101001: q = 4'b1000; - 11'b110_0101010: q = 4'b1000; - 11'b110_0101011: q = 4'b1000; - 11'b110_0101100: q = 4'b1000; - 11'b110_0101101: q = 4'b1000; - 11'b110_0101110: q = 4'b1000; - 11'b110_0101111: q = 4'b1000; - 11'b110_0110000: q = 4'b1000; - 11'b110_0110001: q = 4'b1000; - 11'b110_0110010: q = 4'b1000; - 11'b110_0110011: q = 4'b1000; - 11'b110_0110100: q = 4'b1000; - 11'b110_0110101: q = 4'b1000; - 11'b110_0110110: q = 4'b1000; - 11'b110_0110111: q = 4'b1000; - 11'b110_0111000: q = 4'b1000; - 11'b110_0111001: q = 4'b1000; - 11'b110_0111010: q = 4'b1000; - 11'b110_0111011: q = 4'b1000; - 11'b110_0111100: q = 4'b1000; - 11'b110_0111101: q = 4'b1000; - 11'b110_0111110: q = 4'b1000; - 11'b110_0111111: q = 4'b1000; - 11'b110_1000000: q = 4'b0001; - 11'b110_1000001: q = 4'b0001; - 11'b110_1000010: q = 4'b0001; - 11'b110_1000011: q = 4'b0001; - 11'b110_1000100: q = 4'b0001; - 11'b110_1000101: q = 4'b0001; - 11'b110_1000110: q = 4'b0001; - 11'b110_1000111: q = 4'b0001; - 11'b110_1001000: q = 4'b0001; - 11'b110_1001001: q = 4'b0001; - 11'b110_1001010: q = 4'b0001; - 11'b110_1001011: q = 4'b0001; - 11'b110_1001100: q = 4'b0001; - 11'b110_1001101: q = 4'b0001; - 11'b110_1001110: q = 4'b0001; - 11'b110_1001111: q = 4'b0001; - 11'b110_1010000: q = 4'b0001; - 11'b110_1010001: q = 4'b0001; - 11'b110_1010010: q = 4'b0001; - 11'b110_1010011: q = 4'b0001; - 11'b110_1010100: q = 4'b0010; - 11'b110_1010101: q = 4'b0010; - 11'b110_1010110: q = 4'b0010; - 11'b110_1010111: q = 4'b0010; - 11'b110_1011000: q = 4'b0010; - 11'b110_1011001: q = 4'b0010; - 11'b110_1011010: q = 4'b0010; - 11'b110_1011011: q = 4'b0010; - 11'b110_1011100: q = 4'b0010; - 11'b110_1011101: q = 4'b0010; - 11'b110_1011110: q = 4'b0010; - 11'b110_1011111: q = 4'b0010; - 11'b110_1100000: q = 4'b0010; - 11'b110_1100001: q = 4'b0010; - 11'b110_1100010: q = 4'b0010; - 11'b110_1100011: q = 4'b0010; - 11'b110_1100100: q = 4'b0010; - 11'b110_1100101: q = 4'b0010; - 11'b110_1100110: q = 4'b0010; - 11'b110_1100111: q = 4'b0010; - 11'b110_1101000: q = 4'b0010; - 11'b110_1101001: q = 4'b0010; - 11'b110_1101010: q = 4'b0010; - 11'b110_1101011: q = 4'b0010; - 11'b110_1101100: q = 4'b0010; - 11'b110_1101101: q = 4'b0010; - 11'b110_1101110: q = 4'b0010; - 11'b110_1101111: q = 4'b0010; - 11'b110_1110000: q = 4'b0000; - 11'b110_1110001: q = 4'b0000; - 11'b110_1110010: q = 4'b0000; - 11'b110_1110011: q = 4'b0000; - 11'b110_1110100: q = 4'b0000; - 11'b110_1110101: q = 4'b0000; - 11'b110_1110110: q = 4'b0000; - 11'b110_1110111: q = 4'b0000; - 11'b110_1111000: q = 4'b0000; - 11'b110_1111001: q = 4'b0000; - 11'b110_1111010: q = 4'b0000; - 11'b110_1111011: q = 4'b0000; - 11'b110_1111100: q = 4'b0000; - 11'b110_1111101: q = 4'b0000; - 11'b110_1111110: q = 4'b0000; - 11'b110_1111111: q = 4'b0000; - 11'b111_0000000: q = 4'b0000; - 11'b111_0000001: q = 4'b0000; - 11'b111_0000010: q = 4'b0000; - 11'b111_0000011: q = 4'b0000; - 11'b111_0000100: q = 4'b0000; - 11'b111_0000101: q = 4'b0000; - 11'b111_0000110: q = 4'b0000; - 11'b111_0000111: q = 4'b0000; - 11'b111_0001000: q = 4'b0000; - 11'b111_0001001: q = 4'b0000; - 11'b111_0001010: q = 4'b0000; - 11'b111_0001011: q = 4'b0000; - 11'b111_0001100: q = 4'b0000; - 11'b111_0001101: q = 4'b0000; - 11'b111_0001110: q = 4'b0000; - 11'b111_0001111: q = 4'b0000; - 11'b111_0010000: q = 4'b0100; - 11'b111_0010001: q = 4'b0100; - 11'b111_0010010: q = 4'b0100; - 11'b111_0010011: q = 4'b0100; - 11'b111_0010100: q = 4'b0100; - 11'b111_0010101: q = 4'b0100; - 11'b111_0010110: q = 4'b0100; - 11'b111_0010111: q = 4'b0100; - 11'b111_0011000: q = 4'b0100; - 11'b111_0011001: q = 4'b0100; - 11'b111_0011010: q = 4'b0100; - 11'b111_0011011: q = 4'b0100; - 11'b111_0011100: q = 4'b0100; - 11'b111_0011101: q = 4'b0100; - 11'b111_0011110: q = 4'b0100; - 11'b111_0011111: q = 4'b0100; - 11'b111_0100000: q = 4'b0100; - 11'b111_0100001: q = 4'b0100; - 11'b111_0100010: q = 4'b0100; - 11'b111_0100011: q = 4'b0100; - 11'b111_0100100: q = 4'b0100; - 11'b111_0100101: q = 4'b0100; - 11'b111_0100110: q = 4'b0100; - 11'b111_0100111: q = 4'b0100; - 11'b111_0101000: q = 4'b0100; - 11'b111_0101001: q = 4'b0100; - 11'b111_0101010: q = 4'b0100; - 11'b111_0101011: q = 4'b0100; - 11'b111_0101100: q = 4'b1000; - 11'b111_0101101: q = 4'b1000; - 11'b111_0101110: q = 4'b1000; - 11'b111_0101111: q = 4'b1000; - 11'b111_0110000: q = 4'b1000; - 11'b111_0110001: q = 4'b1000; - 11'b111_0110010: q = 4'b1000; - 11'b111_0110011: q = 4'b1000; - 11'b111_0110100: q = 4'b1000; - 11'b111_0110101: q = 4'b1000; - 11'b111_0110110: q = 4'b1000; - 11'b111_0110111: q = 4'b1000; - 11'b111_0111000: q = 4'b1000; - 11'b111_0111001: q = 4'b1000; - 11'b111_0111010: q = 4'b1000; - 11'b111_0111011: q = 4'b1000; - 11'b111_0111100: q = 4'b1000; - 11'b111_0111101: q = 4'b1000; - 11'b111_0111110: q = 4'b1000; - 11'b111_0111111: q = 4'b1000; - 11'b111_1000000: q = 4'b0001; - 11'b111_1000001: q = 4'b0001; - 11'b111_1000010: q = 4'b0001; - 11'b111_1000011: q = 4'b0001; - 11'b111_1000100: q = 4'b0001; - 11'b111_1000101: q = 4'b0001; - 11'b111_1000110: q = 4'b0001; - 11'b111_1000111: q = 4'b0001; - 11'b111_1001000: q = 4'b0001; - 11'b111_1001001: q = 4'b0001; - 11'b111_1001010: q = 4'b0001; - 11'b111_1001011: q = 4'b0001; - 11'b111_1001100: q = 4'b0001; - 11'b111_1001101: q = 4'b0001; - 11'b111_1001110: q = 4'b0001; - 11'b111_1001111: q = 4'b0001; - 11'b111_1010000: q = 4'b0001; - 11'b111_1010001: q = 4'b0001; - 11'b111_1010010: q = 4'b0010; - 11'b111_1010011: q = 4'b0010; - 11'b111_1010100: q = 4'b0010; - 11'b111_1010101: q = 4'b0010; - 11'b111_1010110: q = 4'b0010; - 11'b111_1010111: q = 4'b0010; - 11'b111_1011000: q = 4'b0010; - 11'b111_1011001: q = 4'b0010; - 11'b111_1011010: q = 4'b0010; - 11'b111_1011011: q = 4'b0010; - 11'b111_1011100: q = 4'b0010; - 11'b111_1011101: q = 4'b0010; - 11'b111_1011110: q = 4'b0010; - 11'b111_1011111: q = 4'b0010; - 11'b111_1100000: q = 4'b0010; - 11'b111_1100001: q = 4'b0010; - 11'b111_1100010: q = 4'b0010; - 11'b111_1100011: q = 4'b0010; - 11'b111_1100100: q = 4'b0010; - 11'b111_1100101: q = 4'b0010; - 11'b111_1100110: q = 4'b0010; - 11'b111_1100111: q = 4'b0010; - 11'b111_1101000: q = 4'b0010; - 11'b111_1101001: q = 4'b0010; - 11'b111_1101010: q = 4'b0010; - 11'b111_1101011: q = 4'b0010; - 11'b111_1101100: q = 4'b0010; - 11'b111_1101101: q = 4'b0010; - 11'b111_1101110: q = 4'b0010; - 11'b111_1101111: q = 4'b0010; - 11'b111_1110000: q = 4'b0000; - 11'b111_1110001: q = 4'b0000; - 11'b111_1110010: q = 4'b0000; - 11'b111_1110011: q = 4'b0000; - 11'b111_1110100: q = 4'b0000; - 11'b111_1110101: q = 4'b0000; - 11'b111_1110110: q = 4'b0000; - 11'b111_1110111: q = 4'b0000; - 11'b111_1111000: q = 4'b0000; - 11'b111_1111001: q = 4'b0000; - 11'b111_1111010: q = 4'b0000; - 11'b111_1111011: q = 4'b0000; - 11'b111_1111100: q = 4'b0000; - 11'b111_1111101: q = 4'b0000; - 11'b111_1111110: q = 4'b0000; - 11'b111_1111111: q = 4'b0000; - endcase diff --git a/pipelined/srt/sim-srt4 b/pipelined/srt/sim-srt4 deleted file mode 100755 index 1293b7261..000000000 --- a/pipelined/srt/sim-srt4 +++ /dev/null @@ -1,2 +0,0 @@ -vsim -do "do srt-radix4.do" - diff --git a/pipelined/srt/sim-srt4-batch b/pipelined/srt/sim-srt4-batch deleted file mode 100755 index 56cbcecb9..000000000 --- a/pipelined/srt/sim-srt4-batch +++ /dev/null @@ -1 +0,0 @@ -vsim -c -do "do srt-radix4.do" diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index d4b680626..066151653 100755 Binary files a/pipelined/srt/sqrttestgen and b/pipelined/srt/sqrttestgen differ diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 7b8cacd3f..76c6a6649 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -19,7 +19,7 @@ /* Prototypes */ -void output(FILE *fptr, double a, double r); +void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac); void printhex(FILE *fptr, double x); double random_input(void); @@ -28,12 +28,16 @@ double random_input(void); void main(void) { FILE *fptr; - double a, b, r; - double list[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, + double aFrac, rFrac; + int aExp, rExp; + double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, - 1.1, 1.2, 1.01, 1.001, 1.0001, - 1/1.1, 1/1.5, 1/1.25, 1/1.125}; - int i, j; + 1.1, 1.5, 1.01, 1.001, 1.0001, + 2/1.1, 2/1.5, 2/1.25, 2/1.125}; + double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10, + 11, 12, 13, 14, 15, 16}; + int i; + int bias = 1023; if ((fptr = fopen("sqrttestvectors","w")) == NULL) { fprintf(stderr, "Couldn't write sqrttestvectors file\n"); @@ -41,31 +45,52 @@ void main(void) } for (i=0; i" prompt: -# do wally-pipelined.do -# or, to run from a shell, type the following at the shell prompt: -# vsim -do wally-pipelined.do -c -# (omit the "-c" to see the GUI while running from the shell) - -onbreak {resume} - -# create library -if [file exists work] { - vdel -all -} -vlib work - -vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv -vopt +acc work.testbenchradix4 -o workopt -vsim workopt - --- display input and output signals as hexidecimal values -add wave /testbenchradix4/* -add wave /testbenchradix4/srtradix4/* -add wave /testbenchradix4/srtradix4/qsel4/* -add wave /testbenchradix4/srtradix4/otfc4/* - --- Run the Simulation -run -all diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv deleted file mode 100644 index 39432c9e3..000000000 --- a/pipelined/srt/srt-radix4.sv +++ /dev/null @@ -1,383 +0,0 @@ -/////////////////////////////////////////// -// srt.sv -// -// Written: David_Harris@hmc.edu 13 January 2022 -// Modified: -// -// Purpose: Combined Divide and Square Root Floating Point and Integer Unit -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module srtradix4 ( - input logic clk, - input logic DivStart, - input logic [`NE-1:0] XExpE, YExpE, - input logic [`NF:0] XManE, YManE, - input logic [`XLEN-1:0] SrcA, SrcB, - input logic XInfE, YInfE, - input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic W64, // 32-bit ints on XLEN=64 - input logic Signed, // Interpret integers as signed 2's complement - input logic Int, // Choose integer inputs - input logic Sqrt, // perform square root, not divide - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, - output logic DivDone, - output logic DivStickyE, - output logic DivNegStickyE, - output logic [`DIVLEN+2:0] Quot, - output logic [`XLEN-1:0] Rem, // *** later handle integers - output logic [`NE+1:0] DivCalcExpE -); - - logic [3:0] q; - logic [`NE+1:0] DivCalcExp; - logic [`DIVLEN-1:0] X; - logic [`DIVLEN-1:0] Dpreproc; - logic [`DIVLEN+3:0] WS, WSA, WSN; - logic [`DIVLEN+3:0] WC, WCA, WCN; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; - logic [$clog2(`XLEN+1)-1:0] intExp; - logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic intSign; - - srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X, - .XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign); - - // Top Muxes and Registers - // When start is asserted, the inputs are loaded into the divider. - // Otherwise, the divisor is retained and the partial remainder - // is fed back for the next iteration. - // - when the start signal is asserted X and 0 are loaded into WS and WC - // - otherwise load WSA into the flipflop - // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) - // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); - flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flop #(`DIVLEN+4) wcflop(clk, WCN, WC); - flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - - // Quotient Selection logic - // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - // *** change this for radix 4 - generate w/ stine code - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .q); - - // Store the expoenent and sign until division is DivDone - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE); - - // Divisor Selection logic - // *** radix 4 change to choose -2 to 2 - // - choose the negitive version of what's being selected - assign DBar = ~D; - assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; - assign D2 = {D[`DIVLEN+2:0], 1'b0}; - - always_comb - case (q) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = {(`DIVLEN+4){1'b0}}; - 4'b0010: Dsel = D; - 4'b0001: Dsel = D2; - default: Dsel = {`DIVLEN+4{1'bx}}; - endcase - - // Partial Product Generation - // WSA, WCA = WS + WC - qD - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); - - //*** change for radix 4 - otfc4 otfc4(.clk, .DivStart, .q, .Quot); - - expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); - - earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E, - .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone); - -endmodule - -//////////////// -// Submodules // -//////////////// - -module earlytermination( - input logic clk, - input logic [`DIVLEN+3:0] WS, WC, - input logic XInfE, YInfE, - input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic DivStart, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, - output logic DivStickyE, - output logic DivNegStickyE, - output logic DivDone); - - logic [$clog2(`DIVLEN/2+3)-1:0] Count; - logic WZero; - logic [`DIVLEN+3:0] W; - - assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE; - assign DivDone = (DivStickyE | WZero); - assign DivStickyE = ~|Count; - assign W = WC+WS; - assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? - assign EarlyTermShiftDiv2E = Count; - // +1 for setup - // `DIVLEN/2 to get required number of bits - // +1 for possible .5 and round bit - // Count down Counter - always @(posedge clk) - begin - if (DivStart) Count <= #1 `DIVLEN/2+2; - else Count <= #1 Count-1; - end -endmodule - -module qsel4 ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] WS, WC, - output logic [3:0] q -); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] Dmsbs; - assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; - assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; - // D = 0001.xxx... - // Dmsbs = | | - // W = xxxx.xxx... - // Wmsbs = | | - - logic [3:0] QSel4[1023:0]; - - initial begin - integer d, w, i, w2; - for(d=0; d<8; d++) - for(w=0; w<128; w++)begin - i = d*128+w; - w2 = w-128*(w>=64); // convert to two's complement - case(d) - 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-4) QSel4[i] = 4'b0000; - else if(w2>=-13) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 1: if(w2>=14) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-15) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 2: if(w2>=15) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-16) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 3: if(w2>=16) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-18) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 4: if(w2>=18) QSel4[i] = 4'b1000; - else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 5: if(w2>=20) QSel4[i] = 4'b1000; - else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 6: if(w2>=20) QSel4[i] = 4'b1000; - else if(w2>=8) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-22) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 7: if(w2>=24) QSel4[i] = 4'b1000; - else if(w2>=8) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-24) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - endcase - end - end - assign q = QSel4[{Dmsbs,Wmsbs}]; - -endmodule - -/////////////////// -// Preprocessing // -/////////////////// -module srtpreproc ( - input logic [`XLEN-1:0] SrcA, SrcB, - input logic [`NF:0] XManE, YManE, - input logic W64, // 32-bit ints on XLEN=64 - input logic Signed, // Interpret integers as signed 2's complement - input logic Int, // Choose integer inputs - input logic Sqrt, // perform square root, not divide - output logic [`DIVLEN-1:0] X, - output logic [`DIVLEN-1:0] Dpreproc, - output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent - output logic intSign // Quotient integer sign -); - // logic [`XLEN-1:0] PosA, PosB; - // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`DIVLEN-1:0] PreprocA, PreprocX; - logic [`DIVLEN-1:0] PreprocB, PreprocY; - - // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; - // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; - // lzc #(`XLEN) lzcA (PosA, zeroCntA); - // lzc #(`XLEN) lzcB (PosB, zeroCntB); - - // ***can probably merge X LZC with conversion - // cout the number of leading zeros - lzc #(`NF+1) lzcA (XManE, XZeroCnt); - lzc #(`NF+1) lzcB (YManE, YZeroCnt); - - // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; - // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; - - // assign PreprocA = ExtraA << zeroCntA; - // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XManE[`NF-1:0]< 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp - begin - errors = errors+1; - $display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp); - $display("failed\n"); - $stop; - end - if (afrac === 52'hxxxxxxxxxxxxx) - begin - $display("%d Tests completed successfully", testnum); - $stop; - end - end - if (req) - begin - req <= 0; - correctr = nextr; - testnum = testnum+1; - Vec = Tests[testnum]; - $display("a = %h b = %h",a,b); - a = Vec[`mema]; - {asign, aExp, afrac} = a; - b = Vec[`memb]; - {bsign, bExp, bfrac} = b; - nextr = Vec[`memr]; - end - end - -endmodule - diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 9655d7f70..39696af44 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -1,28 +1,28 @@ -`define DIVLEN 64 +`include "wally-config.vh" ///////////// // counter // ///////////// -module counter(input logic clk, - input logic req, - output logic done); +// module counter(input logic clk, +// input logic req, +// output logic done); - logic [7:0] count; +// logic [7:0] count; - // This block of control logic sequences the divider - // through its iterations. You may modify it if you - // build a divider which completes in fewer iterations. - // You are not responsible for the (trivial) circuit - // design of the block. +// // This block of control logic sequences the divider +// // through its iterations. You may modify it if you +// // build a divider which completes in fewer iterations. +// // You are not responsible for the (trivial) circuit +// // design of the block. - always @(posedge clk) - begin - if (count == `DIVLEN + 2) done <= #1 1; - else if (done | req) done <= #1 0; - if (req) count <= #1 0; - else count <= #1 count+1; - end -endmodule +// always @(posedge clk) +// begin +// if (count == `DIVLEN + 2) done <= #1 1; +// else if (done | req) done <= #1 0; +// if (req) count <= #1 0; +// else count <= #1 count+1; +// end +// endmodule /////////// // clock // @@ -39,24 +39,27 @@ endmodule // testbench // ////////// module testbench; - logic clk; - logic req; - logic done; - logic [63:0] a, b; - logic [51:0] afrac, bfrac; - logic [10:0] aExp, bExp; - logic asign, bsign; - logic [51:0] r, rOTFC; - logic [`DIVLEN-1:0] Quot, QuotOTFC; - logic [54:0] rp, rm; // positive quotient digits + logic clk; + logic req; + logic done; + logic Int; + logic [`XLEN-1:0] a, b; + logic [`NF-1:0] afrac, bfrac; + logic [`NE-1:0] aExp, bExp; + logic asign, bsign; + logic [`NF-1:0] r; + logic [`XLEN-1:0] rInt; + logic [`DIVLEN-2:0] Quot; // Test parameters parameter MEM_SIZE = 40000; - parameter MEM_WIDTH = 64+64+64; + parameter MEM_WIDTH = 64+64+64+64; - `define memr 63:0 - `define memb 127:64 - `define mema 191:128 + // Test sizes + `define memrem 63:0 + `define memr 127:64 + `define memb 191:128 + `define mema 255:192 // Test logicisters logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file @@ -67,18 +70,22 @@ module testbench; logic rsign; integer testnum, errors; + // Equip Int test or Sqrt test + assign Int = 1'b0; + assign Sqrt = 1'b1; + // Divider srt srt(.clk, .Start(req), .Stall(1'b0), .Flush(1'b0), .XExp(aExp), .YExp(bExp), .rExp, .XSign(asign), .YSign(bsign), .rsign, .SrcXFrac(afrac), .SrcYFrac(bfrac), - .SrcA('0), .SrcB('0), .Fmt(2'b00), - .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), - .Quot, .QuotOTFC, .Rem(), .Flags()); + .SrcA(a), .SrcB(b), .Fmt(2'b00), + .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, + .Quot, .Rem(), .Flags(), .done); // Counter - counter counter(clk, req, done); + // counter counter(clk, req, done); initial @@ -94,63 +101,83 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("testvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; b = Vec[`memb]; {bsign, bExp, bfrac} = b; nextr = Vec[`memr]; - r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; - rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)]; + r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; + rInt = {1'b1, Quot}; req <= #5 1; end // Apply directed test vectors read from file. - always @(posedge clk) - begin - r = Quot[(`DIVLEN - 1):(`DIVLEN - 52)]; - rOTFC = QuotOTFC[(`DIVLEN - 1):(`DIVLEN - 52)]; - if (done) - begin - req <= #5 1; - diffp = correctr[51:0] - r; - diffn = r - correctr[51:0]; - if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp - begin - errors = errors+1; - $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp); - $display("failed\n"); - $stop; - end - if (r !== rOTFC) // Check if OTFC works - begin - errors = errors+1; - $display("OTFC is %h, should be %h\n", rOTFC, r); - $display("failed\n"); - // $stop; + always @(posedge clk) begin + r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; + rInt = {1'b1, Quot}; + if (done) begin + if (~Int & ~Sqrt) begin + req <= #5 1; + diffp = correctr[51:0] - r; + diffn = r - correctr[51:0]; + if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + begin + errors = errors+1; + $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp); + $display("failed\n"); + $stop; + end + if (afrac === 52'hxxxxxxxxxxxxx) + begin + $display("%d Tests completed successfully", testnum); + $stop; + end + end else if (~Sqrt) begin + req <= #5 1; + diffp = correctr[63:0] - rInt; + diffn = rInt - correctr[63:0]; + if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + begin + errors = errors+1; + $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp); + $display("failed\n"); + $stop; + end + if (afrac === 52'hxxxxxxxxxxxxx) + begin + $display("%d Tests completed successfully", testnum); + $stop; + end + end else begin + req <= #5 1; + diffp = correctr[51:0] - r; + diffn = r - correctr[51:0]; + if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + begin + errors = errors + 1; + $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); + $display("failed\n"); + end + if (afrac === 52'hxxxxxxxxxxxxx) begin + $display("%d Tests completed successfully", testnum-errors); + $stop; end end - if (afrac === 52'hxxxxxxxxxxxxx) - begin - $display("%d Tests completed successfully", testnum); - $stop; - end - end - if (req) - begin - req <= #5 0; - correctr = nextr; - testnum = testnum+1; - Vec = Tests[testnum]; - $display("a = %h b = %h",a,b); - a = Vec[`mema]; - {asign, aExp, afrac} = a; - b = Vec[`memb]; - {bsign, bExp, bfrac} = b; - nextr = Vec[`memr]; - end end - + if (req) begin + req <= #5 0; + correctr = nextr; + testnum = testnum+1; + Vec = Tests[testnum]; + $display("a = %h b = %h",a,b); + a = Vec[`mema]; + {asign, aExp, afrac} = a; + b = Vec[`memb]; + {bsign, bExp, bfrac} = b; + nextr = Vec[`memr]; + end + end endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index bbe045972..b90c3d3de 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -1,4 +1,31 @@ - +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Testbench for Testfloat +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" `include "tests-fp.vh" @@ -49,28 +76,35 @@ module testbenchfp; logic XZero, YZero, ZZero; // is the input zero logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) - logic IntZeroE; + logic IntZero; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`DIVLEN+2:0] Quot; + logic [`QLEN-1-(`RADIX/4):0] Quot; logic CvtResDenormUfE; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2; - logic DivStart, DivDone; - + logic [`DURLEN-1:0] EarlyTermShift; + logic DivStart, DivBusy; + logic reset = 1'b0; + logic [`DIVLEN-1:0] DivX; + logic [`DIVLEN-1:0] Dpreproc; + logic [`DIVLEN+3:0] NextWSN, WS; + logic [`DIVLEN+3:0] NextWCN, WC; + logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; + logic [`DURLEN-1:0] Dur; // in-between FMA signals logic Mult; - logic [`NE+1:0] ProdExpE; - logic AddendStickyE; - logic KillProdE; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE; - logic [3*`NF+5:0] SumE; - logic InvZE; - logic NegSumE; - logic ZSgnEffE; - logic PSgnE; + logic [`NE+1:0] Pe; + logic ZmSticky; + logic KillProd; + logic [$clog2(3*`NF+7)-1:0] NCnt; + logic [3*`NF+5:0] Sm; + logic InvA; + logic NegSum; + logic As; + logic Ps; logic DivSticky; + logic DivDone; logic DivNegSticky; logic [`NE+1:0] DivCalcExp; @@ -637,34 +671,35 @@ module testbenchfp; /////////////////////////////////////////////////////////////////////////////////////////////// // instantiate devices under test - fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), - .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), - .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), - .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE, - .ProdExpE, .AddendStickyE, .KillProdE); + fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), + .Xe(XExp), .Ye(YExp), .Ze(ZExp), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), + .XZero, .YZero, .ZZero, + .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, + .Pe, .ZmSticky, .KillProd); - postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]), - .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp), - .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .DivStickyM(DivSticky), - .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .DivNegStickyM(DivNegSticky), - .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE), - .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal), - .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE), - .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), - .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), - .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes)); + postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), + .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), + .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), + .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), + .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, + .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, + .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); - fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), - .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE, - .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE); + fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), + .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, + .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), - .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem()); - + divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), + .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp), + .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone); + assign CmpFlg[3:0] = 0; // produce clock @@ -818,7 +853,7 @@ end // check if result is correct // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((~DivStart&DivDone)^~(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); @@ -841,7 +876,7 @@ end $stop; end - if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector + if(~(DivBusy|DivStart)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file @@ -922,7 +957,7 @@ module readvectors ( end Ans = TestVector[8+(`Q_LEN-1):8]; end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double if(OpCtrl === `FMA_OPCTRL) begin X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; @@ -937,7 +972,7 @@ module readvectors ( end Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single if(OpCtrl === `FMA_OPCTRL) begin X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; @@ -978,7 +1013,7 @@ module readvectors ( DivStart = 1'b1; #10 // one clk cycle DivStart = 1'b0; end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; @@ -986,7 +1021,7 @@ module readvectors ( DivStart = 1'b1; #10 DivStart = 1'b0; end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; @@ -1010,12 +1045,12 @@ module readvectors ( Y = TestVector[12+(`Q_LEN)-1:12]; Ans = TestVector[8]; end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double X = {{`FLEN-`D_LEN{1'b1}}, TestVector[12+2*(`D_LEN)-1:12+(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[12+(`D_LEN)-1:12]}; Ans = TestVector[8]; end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single X = {{`FLEN-`S_LEN{1'b1}}, TestVector[12+2*(`S_LEN)-1:12+(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[12+(`S_LEN)-1:12]}; Ans = TestVector[8]; @@ -1034,7 +1069,7 @@ module readvectors ( X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]}; Ans = TestVector[8+(`Q_LEN-1):8]; end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; end @@ -1048,7 +1083,7 @@ module readvectors ( end endcase end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double case (OpCtrl[1:0]) 2'b11: begin // quad X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`D_LEN+`Q_LEN-1:8+(`Q_LEN)]}; @@ -1068,13 +1103,13 @@ module readvectors ( end endcase end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single case (OpCtrl[1:0]) 2'b11: begin // quad X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`Q_LEN-1:8+(`Q_LEN)]}; Ans = TestVector[8+(`Q_LEN-1):8]; end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`D_LEN-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; end @@ -1094,11 +1129,11 @@ module readvectors ( X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`Q_LEN-1:8+(`Q_LEN)]}; Ans = TestVector[8+(`Q_LEN-1):8]; end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`D_LEN-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`S_LEN-1:8+(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; end @@ -1138,7 +1173,7 @@ module readvectors ( end endcase end - 2'b01: begin // double + 2'b01: if (`D_SUPPORTED)begin // double // {Int->Fp?, is the integer a long} casex ({OpCtrl[2:1]}) 2'b11: begin // long -> double @@ -1164,7 +1199,7 @@ module readvectors ( end endcase end - 2'b00: begin // single + 2'b00: if (`S_SUPPORTED)begin // single // {is the integer a long, is the opperation to an integer} casex ({OpCtrl[2:1]}) 2'b11: begin // long -> single diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 1f4f70a08..0fb5f5e60 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -55,6 +55,7 @@ logic [3:0] dummy; logic HREADYEXT, HRESPEXT; logic [31:0] HADDR; logic [`AHBW-1:0] HWDATA; + logic [`XLEN/8-1:0] HWSTRB; logic HWRITE; logic [2:0] HSIZE; logic [2:0] HBURST; @@ -113,6 +114,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; + // "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; @@ -122,6 +124,7 @@ logic [3:0] dummy; "wally32priv": tests = wally32priv; "wally32periph": tests = wally32periph; "embench": tests = embench; + "coremark": tests = coremark; endcase end if (tests.size() == 0) begin @@ -154,7 +157,7 @@ logic [3:0] dummy; assign HRDATAEXT = 0; wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, - .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, + .HCLK, .HRESETn, .HADDR, .HWDATA, .HWSTRB, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); @@ -283,21 +286,12 @@ logic [3:0] dummy; if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.dtim.ram.memory.RAM[testadrNoBase+i]; else sig = dut.uncore.ram.ram.memory.RAM[testadrNoBase+i]; //$display("signature[%h] = %h sig = %h", i, signature[i], sig); - if (signature[i] !== sig & - //if (signature[i] !== dut.core.lsu.dtim.ram.memory.RAM[testadr+i] & - (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1? - if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin - // if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin - // report errors unless they are garbage at the end of the sim - // kind of hacky test for garbage right now - $display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx); - errors = errors+1; - $display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", - tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]); - // tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.memory.RAM[testadr+i], signature[i]); - $stop;//***debug - end - end + if (signature[i] !== sig & (signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin + errors = errors+1; + $display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h", + tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]); + $stop;//***debug + end i = i + 1; end /* verilator lint_on INFINITELOOP */ diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 79f1c760b..4b1b9a160 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -34,9 +34,9 @@ string tvpaths[] = '{ "../../addins/imperas-riscv-tests/work/", "../../tests/riscof/work/riscv-arch-test/", - "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", + "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", // "../../tests/imperas-riscv-tests/work/", - "../../benchmarks/riscv-coremark/work/", + "../../benchmarks/coremark/work/", "../../addins/embench-iot/" }; @@ -203,16 +203,16 @@ string imperas32f[] = '{ "rv32i_m/F/FCVT-WU-S-RNE-01", "rv32i_m/F/FCVT-WU-S-RTZ-01", "rv32i_m/F/FCVT-WU-S-RUP-01", - // "rv32i_m/F/FDIV-S-DYN-RDN-01", - // "rv32i_m/F/FDIV-S-DYN-RMM-01", - // "rv32i_m/F/FDIV-S-DYN-RNE-01", - // "rv32i_m/F/FDIV-S-DYN-RTZ-01", - // "rv32i_m/F/FDIV-S-DYN-RUP-01", - // "rv32i_m/F/FDIV-S-RDN-01", - // "rv32i_m/F/FDIV-S-RMM-01", - // "rv32i_m/F/FDIV-S-RNE-01", - // "rv32i_m/F/FDIV-S-RTZ-01", - // "rv32i_m/F/FDIV-S-RUP-01", + "rv32i_m/F/FDIV-S-DYN-RDN-01", + "rv32i_m/F/FDIV-S-DYN-RMM-01", + "rv32i_m/F/FDIV-S-DYN-RNE-01", + "rv32i_m/F/FDIV-S-DYN-RTZ-01", + "rv32i_m/F/FDIV-S-DYN-RUP-01", + "rv32i_m/F/FDIV-S-RDN-01", + "rv32i_m/F/FDIV-S-RMM-01", + "rv32i_m/F/FDIV-S-RNE-01", + "rv32i_m/F/FDIV-S-RTZ-01", + "rv32i_m/F/FDIV-S-RUP-01", "rv32i_m/F/FEQ-S-01", "rv32i_m/F/FLE-S-01", "rv32i_m/F/FLT-S-01", @@ -390,16 +390,16 @@ string imperas32f[] = '{ "rv64i_m/F/FCVT-WU-S-RNE-01", "rv64i_m/F/FCVT-WU-S-RTZ-01", "rv64i_m/F/FCVT-WU-S-RUP-01", - // "rv64i_m/F/FDIV-S-DYN-RDN-01", - // "rv64i_m/F/FDIV-S-DYN-RMM-01", - // "rv64i_m/F/FDIV-S-DYN-RNE-01", - // "rv64i_m/F/FDIV-S-DYN-RTZ-01", - // "rv64i_m/F/FDIV-S-DYN-RUP-01", - // "rv64i_m/F/FDIV-S-RDN-01", - // "rv64i_m/F/FDIV-S-RMM-01", - // "rv64i_m/F/FDIV-S-RNE-01", - // "rv64i_m/F/FDIV-S-RTZ-01", - // "rv64i_m/F/FDIV-S-RUP-01", + "rv64i_m/F/FDIV-S-DYN-RDN-01", + "rv64i_m/F/FDIV-S-DYN-RMM-01", + "rv64i_m/F/FDIV-S-DYN-RNE-01", + "rv64i_m/F/FDIV-S-DYN-RTZ-01", + "rv64i_m/F/FDIV-S-DYN-RUP-01", + "rv64i_m/F/FDIV-S-RDN-01", + "rv64i_m/F/FDIV-S-RMM-01", + "rv64i_m/F/FDIV-S-RNE-01", + "rv64i_m/F/FDIV-S-RTZ-01", + "rv64i_m/F/FDIV-S-RUP-01", "rv64i_m/F/FEQ-S-01", "rv64i_m/F/FLE-S-01", "rv64i_m/F/FLT-S-01", @@ -570,16 +570,16 @@ string imperas32f[] = '{ "rv64i_m/D/FCVT-WU-D-RNE-01", "rv64i_m/D/FCVT-WU-D-RTZ-01", "rv64i_m/D/FCVT-WU-D-RUP-01", - // "rv64i_m/D/FDIV-D-DYN-RDN-01", - // "rv64i_m/D/FDIV-D-DYN-RMM-01", - // "rv64i_m/D/FDIV-D-DYN-RNE-01", - // "rv64i_m/D/FDIV-D-DYN-RTZ-01", - // "rv64i_m/D/FDIV-D-DYN-RUP-01", - // "rv64i_m/D/FDIV-D-RDN-01", - // "rv64i_m/D/FDIV-D-RMM-01", - // "rv64i_m/D/FDIV-D-RNE-01", - // "rv64i_m/D/FDIV-D-RTZ-01", - // "rv64i_m/D/FDIV-D-RUP-01", + "rv64i_m/D/FDIV-D-DYN-RDN-01", + "rv64i_m/D/FDIV-D-DYN-RMM-01", + "rv64i_m/D/FDIV-D-DYN-RNE-01", + "rv64i_m/D/FDIV-D-DYN-RTZ-01", + "rv64i_m/D/FDIV-D-DYN-RUP-01", + "rv64i_m/D/FDIV-D-RDN-01", + "rv64i_m/D/FDIV-D-RMM-01", + "rv64i_m/D/FDIV-D-RNE-01", + "rv64i_m/D/FDIV-D-RTZ-01", + "rv64i_m/D/FDIV-D-RUP-01", "rv64i_m/D/FEQ-D-01", "rv64i_m/D/FLD-01", "rv64i_m/D/FLE-D-01", @@ -1119,17 +1119,17 @@ string imperas32f[] = '{ "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S/ref/Ref", "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S/ref/Ref", "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref", - // "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref", + "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref", "rv64i_m/D/src/d_feq_b1-01.S/ref/Ref", "rv64i_m/D/src/d_feq_b19-01.S/ref/Ref", "rv64i_m/D/src/d_fle_b1-01.S/ref/Ref", @@ -1291,17 +1291,17 @@ string imperas32f[] = '{ "rv32i_m/F/src/fcvt.wu.s_b27-01.S/ref/Ref", "rv32i_m/F/src/fcvt.wu.s_b28-01.S/ref/Ref", "rv32i_m/F/src/fcvt.wu.s_b29-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref", - // "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref", + "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref", "rv32i_m/F/src/feq_b1-01.S/ref/Ref", "rv32i_m/F/src/feq_b19-01.S/ref/Ref", "rv32i_m/F/src/fle_b1-01.S/ref/Ref", @@ -1662,6 +1662,12 @@ string wally32i[] = '{ // "rv64i_m/privilege/src/WALLY-periph.S/ref/Ref" // }; + + string wally32d[] = '{ + `WALLYTEST, + "rv32i_m/D/src/WALLY-fld.S/ref/Ref" + }; + // string wally32i[] = '{ // `WALLYTEST, // "rv32i_m/I/src/WALLY-ADD.S/ref/Ref", diff --git a/synthDC/Makefile b/synthDC/Makefile index 611dcfef9..98b719428 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -1,17 +1,20 @@ # -# Makefile for synthesis +# Makefile for synthesis # Shreya Sanghai (ssanghai@hmc.edu) 2/28/2022 +# Madeleine Masser-Frye (mmasserfrye@hmc.edu) 7/8/2022 NAME := synth # defaults export DESIGN ?= wallypipelinedcore -export FREQ ?= 3402 +export FREQ ?= 3000 export CONFIG ?= rv32e -# sky130 and sky90 presently supported -export TECH ?= tsmc28 +# title to add a note in the synth's directory name +TITLE = +# tsmc28, sky130, and sky90 presently supported +export TECH ?= sky90 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient # Avoid when doing sweeps of many optimization points in parallel -export MAXCORES ?= 4 +export MAXCORES ?= 1 # MAXOPT turns on flattening, boundary optimization, and retiming # The output netlist is hard to interpret, but significantly better PPA export MAXOPT ?= 0 @@ -19,20 +22,14 @@ export DRIVE ?= FLOP time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) -export OUTPUTDIR := newRuns/$(DESIGN)_$(CONFIG)_$(TECH)nm_$(FREQ)_MHz_$(time)_$(hash) +export OUTPUTDIR := runs/$(DESIGN)_$(CONFIG)_$(TECH)nm_$(FREQ)_MHz_$(time)_$(TITLE)_$(hash) export SAIFPOWER ?= 0 CONFIGDIR ?= ${WALLY}/pipelined/config CONFIGFILES ?= $(shell find $(CONFIGDIR) -name rv*_*) CONFIGFILESTRIM = $(notdir $(CONFIGFILES)) # FREQS = 25 50 100 150 200 250 300 350 400 -k = 3 6 - -ifeq ($(TECH), sky130) - FREQS = 25 50 100 150 200 250 300 350 400 -else ifeq ($(TECH), sky90) - FREQS = 500 550 600 650 700 750 800 850 900 950 1000 -endif +# k = 3 6 print: @echo $(FREQS) @@ -40,39 +37,37 @@ print: default: - @echo "Basic synthesis procedure for Wally:" - @echo " Invoke with make synth" - -test: rv% - echo "Running test on $<" - -rv%.log: rv% - echo $< + @echo " Basic synthesis procedure for Wally:" + @echo " Invoke with make synth" + @echo "Use wallySynth.py to run a concurrent sweep " -DIRS = rv64gc rv32e rv32gc rv64ic rv32ic -# DELDIRS = rv32e rv32gc rv64ic rv64gc rv32ic -# CONFIGSUBDIRS = _FPUoff _noMulDiv _noVirtMem _PMP0 _PMP16 _orig +DIRS32 = rv32e rv32gc rv32ic +DIRS64 = rv64ic rv64gc +DIRS = $(DIRS32) $(DIRS64) + # bpred: # @$(foreach kval, $(k), rm -rf $(CONFIGDIR)/rv64gc_bpred_$(kval);) # @$(foreach kval, $(k), cp -r $(CONFIGDIR)/rv64gc $(CONFIGDIR)/rv64gc_bpred_$(kval);) # @$(foreach kval, $(k), sed -i 's/BPRED_SIZE.*/BPRED_SIZE $(kval)/g' $(CONFIGDIR)/rv64gc_bpred_$(kval)/wally-config.vh;) # @$(foreach kval, $(k), make synth DESIGN=wallypipelinedcore CONFIG=rv64gc_bpred_$(kval) TECH=sky90 FREQ=500 MAXCORES=4 --jobs;) copy: + # remove old config files + rm -rf $(CONFIGDIR)/*_* + @$(foreach dir, $(DIRS), rm -rf $(CONFIGDIR)/$(dir)_orig;) @$(foreach dir, $(DIRS), cp -r $(CONFIGDIR)/$(dir) $(CONFIGDIR)/$(dir)_orig;) @$(foreach dir, $(DIRS), sed -i 's/WAYSIZEINBYTES.*/WAYSIZEINBYTES 512/g' $(CONFIGDIR)/$(dir)_orig/wally-config.vh;) @$(foreach dir, $(DIRS), sed -i 's/NUMWAYS.*/NUMWAYS 1/g' $(CONFIGDIR)/$(dir)_orig/wally-config.vh;) - @$(foreach dir, $(DIRS), sed -i "s/RAM_RANGE.*/RAM_RANGE 34\'h01FF/g" $(CONFIGDIR)/$(dir)_orig/wally-config.vh ;) - @$(foreach dir, $(DIRS), sed -i 's/BPRED_SIZE.*/BPRED_SIZE 5/g' $(CONFIGDIR)/$(dir)_orig/wally-config.vh;) + @$(foreach dir, $(DIRS), sed -i 's/BPRED_SIZE.*/BPRED_SIZE 4/g' $(CONFIGDIR)/$(dir)_orig/wally-config.vh;) - -del: - rm -rf $(CONFIGDIR)/*_* + @$(foreach dir, $(DIRS32), sed -i "s/RAM_RANGE.*/RAM_RANGE 34\'h01FF/g" $(CONFIGDIR)/$(dir)_orig/wally-config.vh ;) + @$(foreach dir, $(DIRS64), sed -i "s/RAM_RANGE.*/RAM_RANGE 56\'h01FF/g" $(CONFIGDIR)/$(dir)_orig/wally-config.vh ;) configs: $(DIRS) -$(DIRS): - #turn off FPU +$(DIRS): + + # turn off FPU rm -rf $(CONFIGDIR)/$@_FPUoff cp -r $(CONFIGDIR)/$@_orig $(CONFIGDIR)/$@_FPUoff sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/$@_FPUoff/wally-config.vh @@ -88,12 +83,12 @@ $(DIRS): cp -r $(CONFIGDIR)/$@_FPUoff $(CONFIGDIR)/$@_PMP0 sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES 0/' $(CONFIGDIR)/$@_PMP0/wally-config.vh - #no muldiv + # no muldiv rm -rf $(CONFIGDIR)/$@_noMulDiv cp -r $(CONFIGDIR)/$@_PMP0 $(CONFIGDIR)/$@_noMulDiv sed -i 's/1 *<< *12/0 << 12/' $(CONFIGDIR)/$@_noMulDiv/wally-config.vh - #no priv + # no priv rm -rf $(CONFIGDIR)/$@_noPriv cp -r $(CONFIGDIR)/$@_noMulDiv $(CONFIGDIR)/$@_noPriv sed -i 's/ZICSR_SUPPORTED *1/ZICSR_SUPPORTED 0/' $(CONFIGDIR)/$@_noPriv/wally-config.vh @@ -101,16 +96,10 @@ $(DIRS): freqs: @$(foreach freq, $(FREQS), make synth DESIGN=wallypipelinedcore CONFIG=rv32e FREQ=$(freq) MAXCORES=1;) -allsynth: $(CONFIGFILESTRIM) - -$(CONFIGFILESTRIM): - make synth DESIGN=wallypipelinedcore CONFIG=$@ TECH=sky90 FREQ=1000 MAXCORES=1 - - synth: @echo "DC Synthesis" - @mkdir -p hdl/ @mkdir -p $(OUTPUTDIR) + @mkdir -p $(OUTPUTDIR)/hdl @mkdir -p $(OUTPUTDIR)/reports @mkdir -p $(OUTPUTDIR)/mapped @mkdir -p $(OUTPUTDIR)/unmapped @@ -118,10 +107,11 @@ ifeq ($(SAIFPOWER), 1) cp -f ../pipelined/regression/power.saif . endif dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out +# rm -rf $(OUTPUTDIR)/hdl + rm -rf $(OUTPUTDIR)/WORK + rm -rf $(OUTPUTDIR)/alib-52 clean: - rm -rf alib-52 WORK analyzed $(NAME).out - rm -f hdl/* rm -f default.svf rm -f command.log rm -f filenames*.log @@ -129,5 +119,5 @@ clean: rm -f Synopsys_stack_trace_*.txt rm -f crte_*.txt - - +fresh: clean copy configs + @echo "synth directory cleaned and fresh config files written" diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index a2f6a9b50..978365b16 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -7,7 +7,11 @@ import subprocess from matplotlib.cbook import flatten import matplotlib.pyplot as plt import matplotlib.lines as lines -from wallySynth import testFreq +import numpy as np +from ppa.ppaAnalyze import noOutliers +from matplotlib import ticker +import argparse +import os def synthsintocsv(): @@ -27,7 +31,7 @@ def synthsintocsv(): writer.writerow(['Width', 'Config', 'Special', 'Tech', 'Target Freq', 'Delay', 'Area']) for oneSynth in allSynths: - descrip = specReg.findall(oneSynth) #[30:] + descrip = specReg.findall(oneSynth) width = descrip[2][:4] config = descrip[2][4:] if descrip[3][-2:] == 'nm': @@ -56,6 +60,7 @@ def synthsintocsv(): writer.writerow([width, config, special, tech, freq, delay, area]) file.close() + def synthsfromcsv(filename): Synth = namedtuple("Synth", "width config special tech freq delay area") with open(filename, newline='') as csvfile: @@ -71,28 +76,42 @@ def synthsfromcsv(filename): allSynths[i] = Synth(*allSynths[i]) return allSynths + def freqPlot(tech, width, config): ''' plots delay, area for syntheses with specified tech, module, width ''' + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, 'plots/wally') + if not os.path.exists(final_directory): + os.makedirs(final_directory) + freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & (oneSynth.special == ''): + if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special): ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] - f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) + allFreqs = list(flatten(freqsL)) + if allFreqs != []: + median = np.median(allFreqs) + else: + median = 0 for ind in [0,1]: areas = areasL[ind] delays = delaysL[ind] freqs = freqsL[ind] + freqs, delays, areas = noOutliers(median, freqs, delays, areas) c = 'blue' if ind else 'green' - ax1.scatter(freqs, delays, color=c) - ax2.scatter(freqs, areas, color=c) + targs = [1000/f for f in freqs] + + ax1.scatter(targs, delays, color=c) + ax2.scatter(targs, areas, color=c) freqs = list(flatten(freqsL)) delays = list(flatten(delaysL)) @@ -104,65 +123,143 @@ def freqPlot(tech, width, config): ax1.legend(handles=legend_elements) ytop = ax2.get_ylim()[1] ax2.set_ylim(ymin=0, ymax=1.1*ytop) - ax2.set_xlabel("Target Freq (MHz)") - ax1.set_ylabel('Delay (ns)') + ax2.set_xlabel("Target Cycle Time (ns)") + ax1.set_ylabel('Cycle Time Achieved (ns)') ax2.set_ylabel('Area (sq microns)') - ax1.set_title(tech + ' ' + width +config) + ax1.set_title(tech + ' ' + width + config) + ax2.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) + addFO4axis(fig, ax1, tech) + plt.savefig('./plots/wally/freqSweep_' + tech + '_' + width + config + '.png') - # plt.show() -def areaDelay(tech, freq, width=None, config=None, special=None): - delays, areas, labels = ([] for i in range(3)) - for oneSynth in allSynths: - if (width==None) or (width == oneSynth.width): - if (tech == oneSynth.tech) & (freq == oneSynth.freq): - if (special != None) & (oneSynth.special == special): - delays += [oneSynth.delay] - areas += [oneSynth.area] - labels += [oneSynth.width + oneSynth.config] - elif (config != None) & (oneSynth.config == config): - delays += [oneSynth.delay] - areas += [oneSynth.area] - labels += [oneSynth.special] - # else: - # delays += [oneSynth.delay] - # areas += [oneSynth.area] - # labels += [oneSynth.config + '_' + oneSynth.special] - if width == None: - width = '' +def areaDelay(tech, delays, areas, labels, fig, ax, norm=False): + + plt.subplots_adjust(left=0.18) + + fo4 = techdict[tech].fo4 + add32area = techdict[tech].add32area + marker = techdict[tech].shape + color = techdict[tech].color + + if norm: + delays = [d/fo4 for d in delays] + areas = [a/add32area for a in areas] - f, (ax1) = plt.subplots(1, 1) - plt.scatter(delays, areas) - plt.xlabel('Delay (ns)') + plt.scatter(delays, areas, marker=marker, color=color) + plt.xlabel('Cycle time (ns)') plt.ylabel('Area (sq microns)') - ytop = ax1.get_ylim()[1] + ytop = ax.get_ylim()[1] plt.ylim(ymin=0, ymax=1.1*ytop) - titleStr = tech + ' ' + width - saveStr = tech + '_' + width - if config: - titleStr += config - saveStr = saveStr + config + '_versions_' - if (special != None): - titleStr += special - saveStr = saveStr + '_origConfigs_' - saveStr += str(freq) - titleStr = titleStr + ' (target freq: ' + str(freq) + ')' - plt.title(titleStr) + + ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) for i in range(len(labels)): plt.annotate(labels[i], (delays[i], areas[i]), textcoords="offset points", xytext=(0,10), ha='center') - plt.savefig('./plots/wally/areaDelay_' + saveStr + '.png') - -# ending freq in 42 means fpu was turned off manually + return fig + + +def plotFeatures(tech, width, config): + delays, areas, labels = ([] for i in range(3)) + freq = techdict[tech].targfreq + for oneSynth in allSynths: + if (tech == oneSynth.tech) & (freq == oneSynth.freq): + if (oneSynth.config == config) & (width == oneSynth.width): + delays += [oneSynth.delay] + areas += [oneSynth.area] + labels += [oneSynth.special] + + fig, (ax) = plt.subplots(1, 1) + + fig = areaDelay(tech, delays, areas, labels, fig, ax) + + titlestr = tech+'_'+width+config + plt.title(titlestr) + plt.savefig('./plots/wally/features_'+titlestr+'.png') + + +def plotConfigs(tech, special=''): + delays, areas, labels = ([] for i in range(3)) + freq = techdict[tech].targfreq + for oneSynth in allSynths: + if (tech == oneSynth.tech) & (freq == oneSynth.freq) & (oneSynth.special == special): + delays += [oneSynth.delay] + areas += [oneSynth.area] + labels += [oneSynth.width + oneSynth.config] + + fig, (ax) = plt.subplots(1, 1) + + fig = areaDelay(tech, delays, areas, labels, fig, ax) + + titleStr = tech+'_'+special + plt.title(titleStr) + plt.savefig('./plots/wally/configs_' + titleStr + '.png') + + +def normAreaDelay(special=''): + fig, (ax) = plt.subplots(1, 1) + fullLeg = [] + for tech in list(techdict.keys()): + delays, areas, labels = ([] for i in range(3)) + spec = techdict[tech] + freq = spec.targfreq + for oneSynth in allSynths: + if (tech == oneSynth.tech) & (freq == oneSynth.freq) & (oneSynth.special == special): + delays += [oneSynth.delay] + areas += [oneSynth.area] + labels += [oneSynth.width + oneSynth.config] + areaDelay(tech, delays, areas, labels, fig, ax, norm=True) + fullLeg += [lines.Line2D([0], [0], markerfacecolor=spec.color, label=tech, marker=spec.shape, markersize=10, color='w')] + + ax.set_title('Normalized Area & Cycle Time by Configuration') + ax.set_xlabel('Cycle Time (FO4)') + ax.set_ylabel('Area (add32)') + ax.legend(handles = fullLeg, loc='upper left') + plt.savefig('./plots/wally/normAreaDelay.png') + + +def addFO4axis(fig, ax, tech): + fo4 = techdict[tech].fo4 + + ax3 = fig.add_axes((0.125,0.14,0.775,0.0)) + ax3.yaxis.set_visible(False) # hide the yaxis + + fo4Range = [x/fo4 for x in ax.get_xlim()] + dif = fo4Range[1] - fo4Range[0] + for n in [0.02, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]: + d = dif/n + if d > 3 and d < 10: + r = [int(x/n) for x in fo4Range] + nsTicks = [round(x*n, 2) for x in range(r[0], r[1]+1)] + break + new_tick_locations = [fo4*float(x) for x in nsTicks] + + ax3.set_xticks(new_tick_locations) + ax3.set_xticklabels(nsTicks) + ax3.set_xlim(ax.get_xlim()) + ax3.set_xlabel("FO4 delays") + plt.subplots_adjust(left=0.125, bottom=0.25, right=0.9, top=0.9) + if __name__ == '__main__': - # synthsintocsv() + + parser = argparse.ArgumentParser() + parser.add_argument("-s", "--skyfreq", type=int, default=3000, help = "Target frequency used for sky90 syntheses") + parser.add_argument("-t", "--tsmcfreq", type=int, default=10000, help = "Target frequency used for tsmc28 syntheses") + args = parser.parse_args() + + TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy") + techdict = {} + techdict['sky90'] = TechSpec('green', 'o', args.skyfreq, 43.2e-3, 1440.600027, 714.057, 0.658023) + techdict['tsmc28'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533) + + synthsintocsv() synthsfromcsv('Summary.csv') freqPlot('tsmc28', 'rv32', 'e') freqPlot('sky90', 'rv32', 'e') - areaDelay('tsmc28', testFreq[1], width= 'rv64', config='gc') - areaDelay('tsmc28', testFreq[1], special='') - areaDelay('sky90', testFreq[0], width='rv64', config='gc') - areaDelay('sky90', testFreq[0], special='') \ No newline at end of file + plotFeatures('sky90', 'rv64', 'gc') + plotFeatures('tsmc28', 'rv64', 'gc') + plotConfigs('sky90', special='orig') + plotConfigs('tsmc28', special='orig') + normAreaDelay(special='orig') diff --git a/synthDC/oldBestSynths.csv b/synthDC/oldBestSynths.csv deleted file mode 100644 index 03ac0764f..000000000 --- a/synthDC/oldBestSynths.csv +++ /dev/null @@ -1,113 +0,0 @@ -Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (fJ) -priorityencoder,sky90,8,7994,0.12495882036527395,60.760001,44.346,13.42057730723042 -priorityencoder,sky90,16,5761,0.16976997552508244,136.220003,77.243,21.28915493084534 -priorityencoder,sky90,32,4776,0.20887023450586265,379.260006,246.78,50.06619521105528 -priorityencoder,sky90,64,4096,0.244021625,794.780014,364.853,72.71844425000002 -priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,126.1332569785861 -add,sky90,8,3652,0.2733695629791895,245.000005,139.276,101.6934774282585 -add,sky90,16,2931,0.33991248447628797,623.280012,352.919,268.5308627362675 -add,sky90,32,2420,0.4132191404958678,1330.840024,582.809,520.6561170247934 -add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,939.1435764188874 -add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2147.9741690795754 -csa,sky90,8,5740,0.16671402787456446,290.080006,207.654,143.04063591637635 -csa,sky90,16,5984,0.16522529946524064,588.000011,322.135,321.19798216042784 -csa,sky90,32,5740,0.16671402787456446,1160.320023,826.559,570.4954033867597 -csa,sky90,64,5984,0.16522529946524064,2469.600048,1440.0,1354.3517797165773 -csa,sky90,128,5984,0.16522529946524064,4897.060095,2990.0,2649.0572263262034 -shiftleft,sky90,8,4321,0.23108991020597083,250.880004,181.951,70.25133270261513 -shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,195.51397310283156 -shiftleft,sky90,32,2500,0.39945200000000003,1400.420023,738.137,368.29474400000004 -shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1144.633567988198 -shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2900.3935725432616 -comparator,sky90,8,4829,0.2066692116380203,198.940004,136.459,48.56726473493477 -comparator,sky90,16,4014,0.24886605181863478,355.740006,188.666,62.714245058295965 -comparator,sky90,32,3596,0.27763876307007784,697.760013,316.793,109.38967264961067 -comparator,sky90,64,3129,0.31954192361776923,1372.980026,508.393,204.82637303899006 -comparator,sky90,128,2682,0.37267507755406415,2836.120055,772.571,463.6077964772558 -flop,sky90,8,10,0.1143419999999935,133.279999,64.8145,0.22163481569998741 -flop,sky90,16,10,0.1143419999999935,266.5599975,129.629,0.4426750529999749 -flop,sky90,32,10,0.1143419999999935,533.119995,259.258,0.88306326599995 -flop,sky90,64,10,0.1143419999999935,1066.23999,520.0,1.7717864609998994 -flop,sky90,128,10,0.1143419999999935,2132.4799805,1035.0,3.537741479999799 -mux2,sky90,1,11806,0.08300869354565475,13.72,12.3,3.8183999031001186 -mux2,sky90,8,5280,0.1887229393939394,63.700001,23.506,19.476207345454547 -mux2,sky90,16,4815,0.20207331983385254,119.560002,32.354,37.76750347694705 -mux2,sky90,32,5000,0.19989700000000002,374.360008,259.372,136.72954800000002 -mux2,sky90,64,4060,0.24566741871921183,514.50001,165.954,163.6145008669951 -mux2,sky90,128,4004,0.24974824975024976,1302.420025,767.078,466.52973053346653 -mux4,sky90,1,7687,0.12838276193573567,28.420001,22.994,6.3164318872381955 -mux4,sky90,8,4655,0.21455177121374866,159.740002,86.462,42.03069198077337 -mux4,sky90,16,4452,0.22313914914645103,392.0,398.313,103.09028690566036 -mux4,sky90,32,3802,0.2622634634402946,465.500009,150.568,139.26189908679646 -mux4,sky90,64,3699,0.2695173360367667,877.100017,304.149,274.9076827575021 -mux4,sky90,128,3166,0.3157249696778269,1984.500039,725.267,569.5678452987997 -mux8,sky90,1,5763,0.17009673572791947,70.560001,49.874,12.31500366670137 -mux8,sky90,8,3577,0.2789168803466592,287.140006,116.648,60.83177160360637 -mux8,sky90,16,3419,0.2915101822170225,588.000006,280.193,150.71076420620065 -mux8,sky90,32,3155,0.3146512107765452,1237.740008,639.983,323.14679346751194 -mux8,sky90,64,3020,0.33032882781456957,2207.940042,730.503,445.61358872185434 -mux8,sky90,128,2666,0.37501377344336084,3761.240072,1460.0,854.281375903976 -mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1420.996059801527 -mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6375.600754155466 -mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24931.79089954522 -mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88845.84517534176 -mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273311.87793918326 -mux2d,sky90,1,13217,0.07565913467503972,19.6,18.562,6.03759894706817 -mux4d,sky90,1,9701,0.10307715647871353,51.940001,49.18,13.626800086485927 -mux8d,sky90,1,7099,0.1341249105507818,85.260001,40.078,14.405015393153965 -priorityencoder,tsmc28,8,31306,0.03191275857663067,8.316,34.836,1.713715135565067 -priorityencoder,tsmc28,16,21202,0.04705136175832468,21.294,73.912,3.815865438600132 -priorityencoder,tsmc28,32,16453,0.060740189205615996,62.118,205.801,9.439025402552724 -priorityencoder,tsmc28,64,13786,0.07244435673872045,137.088001,428.365,18.328422254896275 -priorityencoder,tsmc28,128,11439,0.0874122290410001,315.252,980.365,40.908923191188045 -add,tsmc28,8,13787,0.07226709545223761,33.012,176.194,12.328766484151734 -add,tsmc28,16,11520,0.08680155555555555,90.972001,475.452,33.67900355555555 -add,tsmc28,32,9810,0.1019177991845056,209.286002,1060.0,81.43232154841998 -add,tsmc28,64,8203,0.12186861952944045,392.616003,1800.0,142.34254761038645 -add,tsmc28,128,7210,0.13869425520110956,868.140006,4090.0,331.3405756754508 -csa,tsmc28,8,23865,0.04077636748376283,49.392,473.393,20.91827651917033 -csa,tsmc28,16,23865,0.04077636748376283,98.783999,946.879,41.75500030337314 -csa,tsmc28,32,23865,0.04077636748376283,197.567999,1890.0,83.30611876932745 -csa,tsmc28,64,23865,0.04077636748376283,395.135998,3790.0,166.5306848036874 -csa,tsmc28,128,23865,0.04077636748376283,790.271996,7570.0,333.1021459748586 -shiftleft,tsmc28,8,15183,0.06578013640255549,48.384,333.876,15.51753417736284 -shiftleft,tsmc28,16,11800,0.0847177627118644,130.788,613.549,33.71766955932203 -shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,101.80062296359652 -shiftleft,tsmc28,64,8269,0.12088260744951022,967.427998,4980.0,272.83204501354453 -shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,566.543120162039 -comparator,tsmc28,8,17054,0.05854826984871585,32.256,160.477,8.752966342383019 -comparator,tsmc28,16,13709,0.07280278080093369,48.132,204.944,11.852292714392004 -comparator,tsmc28,32,12136,0.08238147264337507,146.16,623.674,35.50641470929466 -comparator,tsmc28,64,10862,0.09205807659731172,291.312,1240.0,69.41178975437303 -comparator,tsmc28,128,9371,0.10671119720414043,558.432,2400.0,127.9467254477644 -flop,tsmc28,8,10,0.048889000000002625,15.12,78.6345,0.013320296940000717 -flop,tsmc28,16,10,0.048889000000002625,30.24,157.29,0.026541838100001425 -flop,tsmc28,32,10,0.048889000000002625,60.4799995,314.5805,0.05332812120000287 -flop,tsmc28,64,10,0.048889000000002625,120.959999,630.0,0.10640935295000573 -flop,tsmc28,128,10,0.048889000000002625,241.919998,1260.0,0.21305826200001143 -mux2,tsmc28,1,50000,0.019658000000000002,2.142,15.112,0.5917058000000001 -mux2,tsmc28,8,29041,0.033768075961571574,16.884,113.726,5.335356001928308 -mux2,tsmc28,16,19059,0.05221864998163597,15.75,88.448,5.133093293194816 -mux2,tsmc28,32,17903,0.05585556035301346,32.130001,171.146,9.897605294553983 -mux2,tsmc28,64,18546,0.05385698274560552,90.846,517.414,27.359347234767604 -mux2,tsmc28,128,16594,0.0601057455706882,184.968,1150.0,58.603101931421 -mux4,tsmc28,1,26255,0.03808798324128737,5.292,41.928,1.7101504475338032 -mux4,tsmc28,8,18130,0.05509219801434087,27.971999,133.963,8.021424030888031 -mux4,tsmc28,16,16440,0.06065625060827251,39.438,185.149,12.373875124087593 -mux4,tsmc28,32,15168,0.0658052700421941,69.174,324.969,23.229260324894515 -mux4,tsmc28,64,13915,0.07180589399928135,137.465999,648.086,45.59674268954365 -mux4,tsmc28,128,13089,0.07639603056001222,296.603997,1440.0,94.50188980273512 -mux8,tsmc28,1,16320,0.05991150980392156,7.182,38.342,1.8428780415686272 -mux8,tsmc28,8,12885,0.07750962359332557,44.856,215.13,11.90547818393481 -mux8,tsmc28,16,12256,0.08154268929503918,121.841998,521.624,25.93057519582246 -mux8,tsmc28,32,11695,0.08537362676357418,168.21,815.694,46.35787933262078 -mux8,tsmc28,64,11000,0.0907930909090909,304.037999,1490.0,81.89536799999999 -mux8,tsmc28,128,10464,0.09547474923547401,664.775992,2850.0,153.04602302446486 -mult,tsmc28,8,5000,0.19998100000000002,444.150001,3260.0,306.970835 -mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1455.3214569227544 -mult,tsmc28,32,2973,0.3363555785401951,5141.430011,36900.0,5416.333881232761 -mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18545.980779602512 -mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50011.4036139272 -mux2d,tsmc28,1,51887,0.018931650182126544,3.276,26.574,0.9106123737602868 -mux4d,tsmc28,1,32558,0.03008041734750292,4.158,30.464,1.2543534033908719 -mux8d,tsmc28,1,21936,0.045586162654996355,20.664,171.151,6.614552201239972 diff --git a/synthDC/oldPpaData.csv b/synthDC/oldPpaData.csv deleted file mode 100644 index 08cdb35fb..000000000 --- a/synthDC/oldPpaData.csv +++ /dev/null @@ -1,2006 +0,0 @@ -Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (fJ) -shiftleft,sky90,128,2022,0.5271028417408506,10014.620149,7320.0,3350.265662104847 -priorityencoder,tsmc28,64,14337,0.0796055989398061,127.89,410.885,19.264554943433076 -flop,sky90,64,8396,0.11434433539780849,1066.23999,520.0,1486.4191880038113 -mux4,sky90,128,3489,0.3113490759529951,2107.000041,761.758,639.5110020074519 -priorityencoder,sky90,32,5000,0.209401,370.440007,222.189,46.5917225 -mux2d,tsmc28,1,60000,0.018931666666666666,3.276,26.574,1.0526006666666667 -mux2,sky90,128,4087,0.2551632481037436,1146.600021,479.203,403.4130952520186 -mult,tsmc28,16,3741,0.2673062063619353,1458.072003,10500.0,1321.8291904597702 -comparator,sky90,8,4457,0.2225191655822302,175.420003,102.05,44.214558201189135 -flop,tsmc28,16,5000,0.04889200000000002,30.24,157.29,13.320625400000006 -priorityencoder,tsmc28,16,22068,0.046203482508609756,25.2,94.625,4.569524420101505 -priorityencoder,sky90,128,3201,0.31181037425804436,1541.54003,454.954,94.47854340018745 -add,tsmc28,128,8111,0.13344036012822094,1052.226011,5250.0,424.2069048476144 -mux4,tsmc28,32,16078,0.06632979064560268,93.239999,437.954,28.588139768254756 -mux4,sky90,32,4031,0.26555840014884646,686.980013,310.401,187.21867210493676 -add,sky90,8,3726,0.2784303263553409,282.240005,170.25,148.68179427375202 -shiftleft,tsmc28,128,10000,0.144407,2548.727987,12900.0,1076.987406 -mux2,tsmc28,32,19335,0.05585567933798811,32.130001,171.146,10.690777025290924 -mux2,sky90,16,4606,0.21640211984368213,114.660002,40.216,42.56629697325227 -mux8,sky90,128,2851,0.360490121360926,4595.220086,1780.0,991.3478337425465 -mux4,tsmc28,32,5000,0.14417800000000003,54.431999,184.849,11.534240000000002 -flop,sky90,64,13279,0.11434387551773477,1066.23999,520.0,2350.9672525823858 -shiftleft,sky90,8,1000,0.612417,130.340003,12.308,16.06369791 -add,tsmc28,16,5000,0.198828,47.754,237.822,17.516746800000004 -flop,sky90,16,1000,0.114344,266.5599975,129.629,44.2682796 -mux4,tsmc28,64,14785,0.06854111768684477,300.13199,1350.0,71.00859792357119 -comparator,sky90,64,3313,0.3259782315122246,1324.960026,375.296,186.13357019348024 -flop,tsmc28,128,40000,0.048892000000000005,241.919998,1260.0,852.0164380000001 -mux2,tsmc28,16,19821,0.05247054129458655,15.372,84.373,5.241807075329197 -flop,sky90,128,10495,0.1143444683182468,2132.4799805,1035.0,3715.909359172226 -mux4,tsmc28,8,19610,0.05756739061703213,22.302,111.045,7.207437305252423 -mux8,sky90,16,3628,0.2861429581036384,590.940012,200.069,127.61975931422273 -mux2,sky90,16,4700,0.21094695744680853,117.600002,46.727,43.53945201702128 -shiftleft,sky90,16,3220,0.3103190062111801,539.000007,340.34,149.26344198757764 -mux4,tsmc28,8,18870,0.057567170641229466,22.302,111.045,6.936844062268151 -inv,sky90,1,100000,0.068855,3.92,5.959,30.70933 -priorityencoder,sky90,16,5408,0.18331724260355028,124.460002,60.685,17.45180149585799 -comparator,sky90,64,2500,0.399258,1413.160027,227.356,158.10616800000003 -mux8,sky90,8,3577,0.2789168803466592,287.140006,116.648,60.83177160360637 -flop,sky90,8,8396,0.11434433539780849,133.279999,64.8145,185.8552827555979 -mux4,tsmc28,128,13623,0.07585827049842178,298.619997,1420.0,97.40201931997356 -mux2,tsmc28,16,19533,0.052470412891004965,15.372,84.373,5.16833566976399 -mux4,sky90,128,1000,0.921621,1651.300032,109.473,381.551094 -mult,sky90,128,535,1.912680878504673,298339.444095,121000.0,294262.12779618695 -mux8,sky90,1,6174,0.16674654972465178,78.400001,61.522,14.023384831843215 -shiftleft,tsmc28,8,10,0.12195599999999729,15.624,50.976,0.005707540799999872 -shiftleft,sky90,32,2904,0.3714566170798898,1854.160032,1240.0,495.523127184573 -shiftleft,tsmc28,64,10000,0.12092800000000001,1066.337997,5680.0,359.51894400000003 -comparator,tsmc28,32,10000,0.099953,104.957999,399.61,25.1981513 -add,sky90,8,3133,0.3166568917969997,189.140003,92.176,82.01413497542292 -add,sky90,32,1767,0.5628299564233163,930.020018,188.601,298.862706860781 -add,tsmc28,8,10000,0.098662,25.704,135.998,9.866200000000001 -csa,tsmc28,16,5000,0.067577,34.271999,114.459,4.8723016999999995 -flop,sky90,16,14974,0.11434442286630159,266.5599975,129.629,662.8546193559504 -flop,sky90,32,7708,0.11434433990659056,533.119995,259.258,682.3498483925791 -add,sky90,64,2406,0.4551495976724855,3146.780061,1370.0,1377.7378321546137 -priorityencoder,sky90,8,7363,0.13563720616596495,49.980001,33.776,9.264021181135407 -priorityencoder,sky90,16,5638,0.17736086094359702,130.340003,76.093,20.254610319758783 -shiftleft,sky90,64,1500,0.6639466666666667,2139.340042,239.334,359.19514666666663 -flop,sky90,16,7872,0.11434452032520326,266.5599975,129.629,348.4649256910569 -mux4,tsmc28,128,13356,0.07517871638215035,309.707998,1520.0,95.92804210362384 -shiftleft,tsmc28,8,15791,0.06615921170286872,48.762,356.201,17.28078609678931 -mux8,tsmc28,8,1000,0.11438300000000001,30.114,111.042,0.58564096 -shiftleft,tsmc28,128,20000,0.142383,2569.517985,13100.0,2099.579718 -flop,sky90,8,10,0.1143419999999935,133.279999,64.8145,0.22163481569998741 -mux4,tsmc28,32,10,0.12346599999999341,54.431999,199.177,0.01671729639999911 -csa,sky90,64,5000,0.190168,1505.280029,935.708,696.01488 -priorityencoder,tsmc28,32,17125,0.059472160583941606,60.354,212.071,9.25981540291971 -mux2,tsmc28,16,10,0.08560900000000515,14.49,74.123,0.00415203650000025 -mux2d,sky90,1,13482,0.07565897136923305,19.6,18.562,6.158640269455571 -csa,sky90,16,5740,0.16671402787456446,580.160011,413.956,285.58112974912893 -shiftleft,sky90,64,10,1.4229579999999942,2117.780041,153.751,5.165337539999978 -add,sky90,8,4024,0.2823789463220676,270.480005,164.225,155.0260415308151 -mux8,tsmc28,64,10304,0.09699368944099379,286.397999,1400.0,80.60175592546584 -priorityencoder,sky90,128,3757,0.2912058163428267,1743.420033,669.593,153.46546521266964 -csa,tsmc28,32,25357,0.04036684189770083,201.599998,1900.0,87.23274534093149 -mux8,sky90,16,3768,0.2855277813163482,599.760012,190.243,130.77172384288747 -priorityencoder,sky90,32,3119,0.3158135819172812,263.620005,40.919,17.401328363642193 -mult,sky90,64,593,1.6862836408094435,74329.081072,26700.0,67508.67927616525 -csa,tsmc28,16,25000,0.040492,104.832,964.99,43.974312 -priorityencoder,tsmc28,16,10000,0.09986,13.608,39.317,2.636304 -shiftleft,sky90,64,1000,0.980714,2118.760041,162.005,320.693478 -mux8,sky90,32,3218,0.3206610198881293,1057.420021,332.873,226.38668004101925 -comparator,tsmc28,64,11075,0.09302145372460496,282.366,1190.0,66.78940377426636 -mux4,tsmc28,8,19240,0.05756705197505198,22.302,111.045,7.0692339825363835 -add,sky90,8,3073,0.3237329040026033,222.460004,103.281,98.0910699127888 -mux2,sky90,16,5504,0.20207404651162789,119.560002,32.354,43.2236385488372 -flop,tsmc28,32,20700,0.04889217874396135,60.4799995,314.5785,110.23475080507247 -add,tsmc28,64,8370,0.11969531302270012,439.488003,2130.0,164.70075071923534 -mult,sky90,64,500,1.999933,56949.760296,9600.0,26869.099855 -mux2,tsmc28,16,17618,0.054704131683505505,14.994,80.805,4.792081935475082 -shiftleft,tsmc28,128,6461,0.15476280266212658,1215.647996,5280.0,368.6449959411854 -add,sky90,16,2457,0.404978407000407,471.380009,117.088,162.80131961416362 -csa,sky90,32,5617,0.17789797739006588,1160.320023,826.069,595.4245303245506 -add,sky90,16,2808,0.3558933561253561,617.400012,368.293,301.4416726381766 -priorityencoder,sky90,32,4945,0.20940146916076843,376.320007,230.673,46.5708867413549 -priorityencoder,tsmc28,128,30000,0.08661733333333332,321.048001,1040.0,110.61033466666667 -mux2d,sky90,1,14275,0.07565853940455342,19.6,18.562,6.521766096672504 -inv,tsmc28,1,50000,0.014172,0.252,1.005,0.07312752000000002 -mux2,sky90,128,10,0.9859239999999971,882.000017,273.775,4.042288399999989 -shiftleft,sky90,128,1755,0.5695325698005698,7463.680134,3390.0,1931.8544767635328 -mux2,tsmc28,16,15000,0.06434766666666666,14.616,76.036,4.6909449 -priorityencoder,tsmc28,128,10000,0.09996100000000001,204.498,547.044,26.589626000000006 -mux4,sky90,8,4653,0.2148671085321298,155.820002,81.003,41.340431681581784 -mux4,sky90,64,3773,0.26744608136761194,937.860018,363.202,298.2023807248873 -priorityencoder,sky90,16,3000,0.3257383333333333,95.060002,13.926,10.814512666666666 -priorityencoder,sky90,64,4012,0.24924324327018943,786.940013,356.488,71.53281081854438 -mux4,sky90,1,7216,0.1368659312638581,26.460001,19.975,5.392517691796009 -add,sky90,128,2081,0.5470482027871215,6442.520124,2500.0,2626.3784215809706 -comparator,sky90,16,3000,0.3329993333333333,314.580006,67.26,45.38780913333333 -flop,sky90,32,9445,0.11434412493382742,533.119995,259.258,836.1414135786131 -add,tsmc28,64,9073,0.12137112774165106,444.276004,2240.0,188.00387687181745 -shiftleft,tsmc28,64,20000,0.122236,925.091998,4690.0,633.6714239999999 -comparator,sky90,32,3596,0.27763876307007784,697.760013,316.793,109.38967264961067 -add,sky90,32,2450,0.4097372653061225,1346.520026,632.669,538.804503877551 -add,sky90,64,2050,0.4875578780487805,2684.220052,882.766,867.8530229268293 -shiftleft,tsmc28,64,8931,0.12429654428395477,1114.721996,5810.0,348.52751017220925 -comparator,sky90,128,2682,0.37267507755406415,2836.120055,772.571,463.6077964772558 -priorityencoder,sky90,128,3000,0.33288433333333334,1515.080029,375.482,90.54453866666667 -comparator,tsmc28,32,12879,0.08809977995185962,147.924,654.69,40.52589877785543 -mux4,sky90,128,3295,0.30947313657056147,2212.840043,775.234,623.897843326252 -mult,sky90,128,556,1.9619181510791366,297281.04397,133000.0,333545.70486496395 -mux8,sky90,8,3431,0.29033921568055965,275.380005,59.609,49.24153097942292 -mux4,sky90,8,5035,0.22075073187686195,203.840004,102.281,51.92057213743794 -priorityencoder,tsmc28,8,25000,0.039097,4.662,14.604,0.9383279999999999 -mult,tsmc28,32,2852,0.3506291360448808,4220.49602,28600.0,4513.64886830575 -priorityencoder,sky90,16,10,0.660728000000006,85.260002,5.247,0.04572237760000041 -shiftleft,sky90,128,1946,0.521699614594039,10251.780151,7790.0,3348.2681264645425 -mux4,sky90,16,4448,0.22462614388489208,390.040001,401.649,103.10340004316545 -mux4,tsmc28,8,17153,0.058123839853086924,21.042,95.86,6.399434767824871 -comparator,tsmc28,32,12136,0.08238147264337507,146.16,623.674,35.50641470929466 -csa,tsmc28,32,5000,0.067577,68.543999,229.117,9.737845700000001 -priorityencoder,sky90,8,8474,0.1252120245456691,64.680001,49.589,15.526291043662969 -mux2,tsmc28,32,10,0.17166100000000029,28.728001,146.078,0.016805611900000024 -comparator,sky90,128,3249,0.3822960113881194,2764.580054,934.605,431.6121968571868 -mux2,tsmc28,8,31412,0.03374496752833312,16.758,114.743,5.78726193110913 -mux4,sky90,8,4938,0.2105371381125962,164.640002,96.679,46.0865795328473 -shiftleft,tsmc28,128,7585,0.14153315622940013,2560.949989,13000.0,799.2377332274225 -mux4,tsmc28,16,10,0.092804000000001,28.224,102.974,0.0064034760000000685 -flop,tsmc28,32,40000,0.048892000000000005,60.4799995,314.581,213.02244399999998 -mult,tsmc28,8,4987,0.20050335552436335,410.634,2900.0,286.920301755364 -flop,sky90,128,8396,0.11434433539780849,2132.4799805,1035.0,2972.5525151691286 -mux4,sky90,16,4092,0.24427427663734116,245.000004,71.922,75.0410577829912 -add,sky90,8,3875,0.26842451612903223,273.420005,147.185,128.3069187096774 -priorityencoder,sky90,128,3004,0.33264848069241015,1519.980029,408.654,97.7986533235686 -shiftleft,tsmc28,64,7608,0.13137658885383804,516.221996,2380.0,161.59320429022077 -csa,sky90,128,1000,0.264181,1881.599976,220.98,178.322175 -csa,tsmc28,32,23865,0.04077636748376283,197.567999,1890.0,83.30611876932745 -flop,sky90,16,8200,0.11434421951219512,266.5599975,129.629,362.9857248414634 -priorityencoder,sky90,64,2999,0.3322404814938313,603.680011,115.541,40.533338742247416 -mux4,sky90,16,1000,0.565966,219.520004,14.317,28.128510199999997 -comparator,tsmc28,16,15000,0.07002866666666667,81.522,384.522,21.358743333333333 -shiftleft,tsmc28,64,8435,0.12291364552459988,1006.488,5100.0,290.69077166567877 -csa,tsmc28,32,25854,0.040894734431809396,201.599998,1900.0,90.74541570418506 -comparator,tsmc28,32,12631,0.08578529530520149,146.286,660.995,38.346027001425064 -mux8,sky90,1,6300,0.16910415873015872,77.420002,50.656,13.173213965079363 -priorityencoder,tsmc28,8,20000,0.043787000000000006,4.788,14.911,0.8345802200000001 -mux8,tsmc28,16,13256,0.08118853771876887,86.814,421.512,25.33082376825589 -mux8,tsmc28,64,11377,0.0900516335589347,507.653995,2410.0,107.79180537004483 -flop,tsmc28,16,21528,0.04889213340765514,30.24,157.29,57.32602642047565 -mux4,sky90,32,3802,0.2622634634402946,465.500009,150.568,139.26189908679646 -add,tsmc28,64,1000,0.999562,189.126001,920.014,70.7689896 -mux2,tsmc28,16,19440,0.052470329218107,15.372,84.373,5.1473392962962965 -mux2,tsmc28,1,47818,0.020583627044209293,2.016,13.728,0.5783999199422811 -add,tsmc28,8,14631,0.0716170281593876,39.06,231.044,16.149639849941902 -mux8,tsmc28,64,11440,0.08783258741258741,503.369994,2330.0,102.14929916083916 -add,tsmc28,16,11294,0.08851858898530193,85.428,437.897,31.512617678767487 -mux4,tsmc28,16,5000,0.10381800000000001,28.224,95.979,4.1631018 -flop,tsmc28,32,10000,0.048892000000000005,60.4799995,314.5805,53.2580556 -flop,tsmc28,8,19458,0.048891743344639735,15.12,78.634,25.92240232132799 -mux2,sky90,32,4979,0.1998975428800964,374.360008,259.372,136.13022670134566 -mux4,sky90,128,10,1.923141000000001,1662.080032,111.431,8.442588990000004 -mux8,tsmc28,32,12914,0.08449634148985596,175.517999,846.989,51.542768308812136 -shiftleft,sky90,8,4056,0.2462123234714004,260.680005,147.281,66.23111501380671 -add,sky90,64,2362,0.49665802540220155,2788.100054,911.951,1021.6255582523286 -flop,sky90,32,8528,0.11434378799249531,533.119995,259.258,754.9548602204503 -mux8,sky90,32,3345,0.3275656621823617,1058.400021,328.34,234.8645797847534 -shiftleft,sky90,32,2500,0.39945200000000003,1400.420023,738.137,368.29474400000004 -csa,tsmc28,128,20000,0.049745000000000004,467.586006,3330.0,177.58965000000003 -shiftleft,tsmc28,64,8269,0.12088260744951022,967.427998,4980.0,272.83204501354453 -csa,tsmc28,8,30000,0.040226333333333336,50.4,473.168,25.66440066666667 -csa,tsmc28,8,26848,0.04022672228843861,52.416,489.192,23.653312705601905 -shiftleft,sky90,64,2158,0.4633380296570899,3501.540059,2250.0,1004.516848296571 -mux8,tsmc28,8,14256,0.07292290347923681,96.641998,446.639,20.63718168462402 -mult,sky90,8,1000,0.999357,1330.840019,338.111,571.632204 -mux2,sky90,64,4391,0.24216455613755408,487.060007,132.876,169.99951840856295 -priorityencoder,tsmc28,32,15000,0.06654566666666667,44.226,135.239,6.541439033333334 -mux2,sky90,128,3840,0.26133266666666666,1384.740006,832.661,489.214752 -priorityencoder,sky90,16,6098,0.16939519285011478,166.600002,113.104,29.813553941620203 -add,tsmc28,16,10000,0.099541,61.236,299.915,23.591217 -shiftleft,sky90,16,3086,0.32394706999351913,634.060009,424.832,185.62167110628644 -flop,sky90,32,7544,0.11434467338282078,533.119995,259.258,667.8300648923647 -mux8,tsmc28,64,10518,0.09489010933637573,292.319999,1420.0,80.37192260791025 -mux2,tsmc28,128,17948,0.05788451437486071,210.924,1240.0,63.15200518297305 -mux8,sky90,64,2759,0.36215216310257337,1951.180036,531.802,383.5191407256253 -comparator,tsmc28,8,17750,0.05673002816901408,36.918,186.638,9.814294873239435 -mux4,tsmc28,1,24680,0.04009963857374392,2.898,17.89,0.8721671389789303 -flop,sky90,16,8046,0.11434435918468804,266.5599975,129.629,356.18267886030327 -csa,tsmc28,64,10000,0.067577,137.087997,458.434,38.99192899999999 -priorityencoder,sky90,8,7057,0.14136427334561427,47.040001,23.417,10.178227680884227 -add,tsmc28,16,12695,0.08854316975187082,116.55,703.496,50.38106358881449 -mux2,tsmc28,8,29634,0.03374502260916515,16.758,114.564,5.436323142336505 -comparator,tsmc28,8,35000,0.05746442857142857,38.682,206.92,21.721554 -add,tsmc28,64,8705,0.12031950775416428,440.622004,2220.0,172.89913264273403 -add,tsmc28,8,15194,0.0705534534684744,41.706,250.932,17.504311805528495 -comparator,sky90,128,2500,0.39962600000000004,2625.420051,495.907,360.86227800000006 -mult,sky90,64,670,1.561193313432836,92421.841521,53500.0,106089.33042101492 -flop,tsmc28,8,15000,0.04889166666666667,15.12,78.6345,19.969801250000003 -mux4d,tsmc28,1,35949,0.029486185457175445,6.426,53.388,2.0728788376394336 -mux8,sky90,128,2609,0.38315661632809506,4166.960064,1500.0,860.9529168892296 -mux8d,sky90,1,7250,0.13412503448275861,85.260001,40.078,14.659866268965517 -csa,sky90,16,5984,0.16522529946524064,588.000011,322.135,321.19798216042784 -mult,sky90,16,1500,1.0034256666666665,8010.520148,5680.0,11227.329784333331 -mux2,sky90,32,5346,0.2190407426112982,233.240005,55.502,88.49246001496446 -mux2,sky90,32,5283,0.2190403903085368,233.240005,55.502,87.17807534279765 -shiftleft,tsmc28,16,12046,0.08302410874979246,122.093999,582.348,32.047305977419896 -mux4,sky90,16,4363,0.22812709168003667,359.659999,419.781,100.14779324753611 -comparator,tsmc28,8,16706,0.05964573338920149,30.114,136.439,7.926917967424878 -mux4,sky90,8,4748,0.21265799578770006,145.040003,62.861,39.53312141693344 -flop,tsmc28,128,20700,0.04889217874396135,241.919998,1260.0,440.8852218236715 -mux2d,tsmc28,1,30000,0.029228333333333332,0.882,4.502,0.23178068333333332 -priorityencoder,tsmc28,8,10000,0.05781100000000001,4.284,14.558,0.49833082000000006 -shiftleft,tsmc28,8,1000,0.12400900000000004,15.624,51.263,0.6225251800000002 -mux4,tsmc28,32,13954,0.07113603898523721,68.04,282.367,20.700587344704033 -flop,tsmc28,128,5000,0.04889200000000002,241.919998,1260.0,106.48922060000005 -priorityencoder,sky90,16,5062,0.19599137534571315,111.720002,44.849,14.248572987633345 -mux2,sky90,64,4225,0.24297539053254436,511.560009,130.238,162.5505362662722 -shiftleft,tsmc28,128,5000,0.199997,672.083996,2400.0,219.19671200000002 -mux2,sky90,32,5385,0.21904002135561745,233.240005,55.502,89.14928869173632 -mult,sky90,16,10,5.005561999999998,3869.040009,641.517,84.49388655999995 -mux2,sky90,16,4982,0.20207360136491367,119.560002,32.354,39.22248602492974 -mux4,tsmc28,8,19297,0.05756652666217547,22.302,111.045,7.092196084780017 -priorityencoder,sky90,32,5139,0.2065433872348706,395.920008,214.865,52.87510713212687 -csa,tsmc28,32,24362,0.0409855330432641,209.664,1920.0,86.84834451867664 -mux2,sky90,8,5060,0.1975614584980237,63.700001,30.692,22.067614914229246 -mux8,sky90,64,1000,0.839879,1606.220031,112.505,229.0350033 -flop,tsmc28,32,20000,0.048892000000000005,60.4799995,314.5785,106.51366660000002 -csa,tsmc28,8,5000,0.067577,17.136,57.229,2.4395297 -shiftleft,sky90,16,3422,0.30792076797194623,625.240012,393.788,169.0485016165985 -mux8,tsmc28,16,12756,0.0794124810285356,97.776,467.559,26.523768663530888 -csa,tsmc28,128,10000,0.067577,274.175995,917.068,78.11901199999998 -priorityencoder,tsmc28,16,30000,0.046221333333333336,26.334,96.855,6.512585866666667 -inv,sky90,1,150000,0.06885566666666666,3.92,5.959,46.064440999999995 -shiftleft,sky90,8,4497,0.23008146920169,262.640004,204.113,76.84721071336445 -mux4,sky90,1,7373,0.13543700135630002,32.340001,22.173,6.785393767950631 -mux4,sky90,16,4541,0.26359681149526537,358.680007,100.453,94.89485213829553 -mux2,tsmc28,32,10000,0.092725,28.854001,147.552,8.595607500000002 -flop,tsmc28,128,1000,0.04889200000000005,241.919998,1260.0,21.307133600000018 -mux8,tsmc28,32,11939,0.08593910880308234,326.843999,1610.0,65.82935734316108 -add,tsmc28,8,13787,0.07226709545223761,33.012,176.194,12.328766484151734 -comparator,tsmc28,32,11641,0.08862527291469806,123.48,513.166,30.664344428485524 -shiftleft,tsmc28,64,15000,0.12178166666666666,990.863998,5040.0,510.02162000000004 -mux8,sky90,64,3119,0.3368585819172812,2214.800043,772.006,488.78180236197505 -mux8,sky90,32,2953,0.3383956725364037,1262.24001,682.649,294.7426307792076 -csa,tsmc28,32,1000,0.067577,68.543999,229.117,1.9462175999999998 -add,tsmc28,128,7360,0.1359025652173913,907.578008,4370.0,347.6387618260869 -mux4,sky90,32,3955,0.26463450063211125,696.780013,305.234,181.00999843236409 -add,tsmc28,16,11972,0.0860352325425994,99.036001,529.716,39.318101271967926 -flop,tsmc28,128,21528,0.04889213340765514,241.919998,1260.0,458.55931923039753 -mux4,tsmc28,16,15755,0.0628519136781974,34.776,149.138,10.464843627419867 -mux2,sky90,1,13011,0.08300904319422028,13.72,12.3,4.216859394266391 -shiftleft,tsmc28,32,9000,0.1110851111111111,249.857999,1130.0,69.31710933333333 -add,sky90,128,1963,0.5368793504839532,6416.060124,2500.0,2342.941485511972 -shiftleft,tsmc28,8,15000,0.06663266666666666,48.258,347.175,16.32500333333333 -mux2,sky90,1,12047,0.08300821781356355,13.72,12.3,3.909687059018843 -priorityencoder,sky90,8,7355,0.13567993065941536,49.980001,33.776,9.307643243235892 -priorityencoder,tsmc28,128,11906,0.08704826490844952,288.414,908.014,39.606960533344534 -comparator,tsmc28,16,15422,0.07112543288808196,65.898,305.232,18.058747410284013 -add,tsmc28,32,9618,0.10391371969224371,201.600002,1010.0,79.1822544054897 -mux4,tsmc28,64,15655,0.07022035547748323,145.151999,710.274,52.03328340881507 -priorityencoder,sky90,8,8131,0.12799310257040955,56.840001,37.422,12.159344744188907 -csa,tsmc28,8,10000,0.067577,17.136,57.229,4.8723016999999995 -mux8,tsmc28,64,10340,0.09667479883945841,338.435996,1430.0,78.88663585299807 -add,sky90,16,2307,0.43290137234503684,443.940009,86.522,149.3509734590377 -mux8,sky90,128,10,1.955451999999994,3207.540062,224.085,11.361176119999966 -priorityencoder,sky90,32,3526,0.28135048723766304,247.940005,38.796,17.58440545235394 -mux2,tsmc28,16,5000,0.08561,14.49,74.123,2.0803230000000004 -shiftleft,tsmc28,8,16398,0.06592904671301378,61.488,451.858,21.888443508720574 -add,tsmc28,64,5000,0.19029300000000002,231.210001,1080.0,79.54247400000001 -mult,sky90,128,1000,1.932611,290264.243977,112000.0,520929.49721700005 -mult,sky90,64,250,3.997668,49702.660357,5450.0,19720.496243999998 -mux4,tsmc28,16,1000,0.092804,28.224,102.974,0.6403476 -flop,sky90,32,9095,0.11434452226498075,533.119995,259.258,805.2141257899945 -mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273311.87793918326 -flop,sky90,32,10495,0.1143444683182468,533.119995,259.258,929.1059773199144 -shiftleft,sky90,64,2338,0.44644599657827205,4425.68006,3620.0,1337.552205748503 -priorityencoder,sky90,16,4694,0.21080192074989348,104.860002,32.047,11.425464104644227 -shiftleft,tsmc28,128,6602,0.1514602517418964,1247.021993,5430.0,390.01014823538327 -priorityencoder,tsmc28,16,21202,0.04705136175832468,21.294,73.912,3.815865438600132 -flop,tsmc28,64,21942,0.04889169692826543,120.959999,630.0,233.6778654686446 -comparator,tsmc28,128,10,0.6097180000000009,256.787999,853.776,0.3451003880000005 -mux8,tsmc28,32,11208,0.08916198429693076,167.201999,792.502,42.70859047822983 -flop,sky90,16,8528,0.11434378799249531,266.5599975,129.629,377.448844163227 -csa,tsmc28,32,10,0.06758100000000411,68.543999,229.117,0.01946332800000118 -mux4,sky90,8,10,0.3993300000000062,116.620002,9.077,0.09559960200000148 -priorityencoder,tsmc28,32,15782,0.06330432530731213,48.51,149.054,7.343301735648207 -mux4,sky90,16,5000,0.255844,359.660007,120.182,94.150592 -mux2,sky90,1,12529,0.08300882959533881,13.72,12.3,4.059131767212068 -mux4,sky90,32,3574,0.27880854504756575,450.800009,134.823,138.01022979854505 -csa,sky90,128,5000,0.190168,3010.560059,1880.0,1393.551104 -flop,sky90,32,8200,0.11434421951219512,533.119995,259.258,725.8571054634145 -mux4,tsmc28,16,16097,0.06183737702677517,34.272,145.085,10.493802881443747 -mux8d,tsmc28,1,22375,0.0455857374301676,20.664,171.151,6.742130565921787 -priorityencoder,tsmc28,128,10,0.9983220000000017,114.533999,291.024,0.15943202340000026 -mult,sky90,64,619,1.6154428852988691,82141.641305,36600.0,84038.56977901777 -shiftleft,sky90,32,2794,0.37189880672870435,1813.980028,1310.0,537.7656745297064 -priorityencoder,sky90,32,4680,0.21348121367521367,366.520007,200.482,45.535542876923074 -mux8,tsmc28,16,10,0.1332400000000007,55.314,209.828,0.013017548000000068 -shiftleft,tsmc28,32,10000,0.102217,374.85,1790.0,106.71454800000001 -shiftleft,sky90,8,4233,0.23563307394283015,247.940005,186.768,67.39105914764941 -mux2,sky90,64,10,0.9630970000000048,444.920009,77.207,1.618002960000008 -mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18545.980779602512 -shiftleft,tsmc28,16,13275,0.08217156685499058,178.037998,941.714,50.535513615819205 -mult,sky90,16,10000,1.01953,8018.360145,5700.0,74794.75986 -mux2,tsmc28,16,18001,0.05470446930726071,14.994,80.805,4.896050002999833 -mux8,sky90,32,3408,0.3320732300469484,899.640018,273.363,196.58735218779347 -mux8,tsmc28,128,10678,0.09732449634763064,910.727989,4180.0,190.5613638486608 -shiftleft,tsmc28,128,6742,0.14831493948383268,1348.955989,6120.0,404.15821009344404 -mult,sky90,32,748,1.357571395721925,22731.100368,11200.0,21406.185767743318 -mux2,tsmc28,8,10,0.08170300000000452,7.56,38.026,0.0019690423000001092 -mux2,sky90,16,4888,0.20438765139116202,115.640002,27.351,37.9752256284779 -priorityencoder,sky90,128,3184,0.313652351758794,1590.54003,480.507,110.71928017085428 -csa,sky90,32,5984,0.16675729946524065,1177.960023,649.039,647.8521084224599 -comparator,tsmc28,32,10,0.21334099999999978,64.26,218.094,0.031830477199999964 -mux2,sky90,32,5487,0.21903995206852558,233.240005,55.502,90.6825401563696 -mult,sky90,32,1500,1.3061846666666668,26329.660471,16700.0,51881.65496000001 -priorityencoder,sky90,8,7835,0.1262664186343331,49.980001,32.378,10.720018942054882 -comparator,sky90,32,4602,0.27431682746631897,748.720015,347.767,143.1933839374185 -flop,tsmc28,8,22356,0.04889172105922347,15.12,78.635,29.76772436690821 -mux4,tsmc28,32,15471,0.0654870628918622,73.961999,360.628,24.688622710232046 -mux4,sky90,8,4750,0.21265831578947367,145.040003,62.861,39.53318090526316 -mux8,tsmc28,8,14804,0.07403031099702784,90.342,420.482,21.542820500135104 -mux2,sky90,64,3000,0.31648333333333334,448.840009,147.78,159.5076 -csa,tsmc28,64,24860,0.04049226146419952,419.327999,3860.0,174.60263143362835 -mux4,sky90,1,8471,0.12749281702278362,28.420001,25.098,6.999355654550821 -flop,sky90,16,7544,0.11434467338282078,266.5599975,129.629,333.9436186145281 -add,sky90,64,2273,0.46160520633523977,2966.460057,1170.0,1136.0104127910251 -mux4,sky90,32,3000,0.3310723333333333,433.160008,114.713,123.15890799999998 -priorityencoder,sky90,128,2644,0.37802782602118,1372.000027,260.085,72.80815929167926 -flop,sky90,64,8746,0.11434398307797851,1066.23999,520.0,1548.446218841985 -csa,sky90,128,6106,0.16536133770062234,5269.460103,3330.0,2494.6411405515887 -csa,tsmc28,8,1000,0.067577,17.136,57.229,0.4872301699999999 -flop,tsmc28,16,20700,0.04889217874396135,30.24,157.289,55.11859770700484 -csa,sky90,64,6472,0.16536174289245983,3010.560059,1720.0,1357.6199091470953 -shiftleft,sky90,16,3556,0.297124848143982,608.58001,422.656,163.12154163104609 -flop,sky90,8,8364,0.11434401912960306,133.279999,64.8145,185.15155297560972 -priorityencoder,tsmc28,8,32533,0.03304801985676083,11.592,49.654,2.3034469840162295 -mux8,sky90,1,5922,0.16975587098953057,72.520001,56.57,13.274909111381291 -shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1144.633567988198 -flop,tsmc28,128,5000,0.04889200000000002,241.919998,1260.0,106.48922060000005 -mux4,tsmc28,8,18500,0.055939054054054055,24.066,117.217,7.4790515270270275 -csa,tsmc28,32,25000,0.040492,209.664,1930.0,87.82714800000001 -mult,sky90,8,1257,0.7949849482895783,1967.840037,1110.0,1339.5496378679395 -shiftleft,tsmc28,128,5000,0.199997,672.083996,2400.0,219.19671200000002 -mux4d,sky90,1,40000,0.103077,51.940001,49.18,56.176965 -mux8,sky90,64,3059,0.3274882170644001,2283.400044,765.158,470.60056792154296 -priorityencoder,tsmc28,64,5000,0.199836,68.292,191.082,9.532177199999998 -csa,tsmc28,16,25357,0.04036684189770083,100.799999,950.751,43.757656617107706 -mux2,sky90,64,4140,0.24845789371980675,479.220009,102.601,154.78926778743963 -flop,sky90,8,8746,0.11434398307797851,133.279999,64.8145,193.6358181434027 -shiftleft,tsmc28,32,20000,0.10250200000000001,341.208,1680.0,201.826438 -csa,tsmc28,128,10000,0.067577,274.175995,917.068,78.11901199999998 -mux2,sky90,16,4630,0.2132927213822894,115.640002,41.725,42.48791009935205 -shiftleft,tsmc28,8,5000,0.11473000000000001,15.624,51.647,2.9829800000000004 -csa,sky90,8,6472,0.16536174289245983,375.340007,216.172,170.48795692212607 -csa,tsmc28,128,26848,0.04022672228843861,838.655994,7830.0,376.4416671752085 -csa,tsmc28,128,30000,0.040226333333333336,806.399994,7570.0,408.33750966666673 -priorityencoder,sky90,32,4967,0.20940176988121603,376.320007,230.673,46.67565450652305 -shiftleft,sky90,32,2474,0.4041457186742118,1482.740026,885.25,385.555015615198 -mux4,tsmc28,8,17760,0.05621230630630631,23.436,112.334,7.12209920900901 -shiftleft,tsmc28,128,1000,0.524341,621.683997,2020.0,121.64711199999998 -comparator,tsmc28,32,12384,0.08232135400516796,163.044,743.896,40.25514210852713 -priorityencoder,sky90,16,5523,0.1807980175629187,119.560002,54.0,16.307981184175265 -mux4,tsmc28,32,14258,0.06951206396409033,70.056,308.062,21.548739828868 -mult,sky90,32,825,1.305535212121212,23737.560395,13100.0,25042.776438909095 -add,sky90,128,1806,0.5536878560354375,5933.900115,2120.0,1891.9514040730899 -comparator,tsmc28,8,18098,0.05802072427892584,35.154,195.216,10.130418459100452 -comparator,sky90,32,1000,0.905658,495.88001,66.524,39.124425599999995 -add,sky90,64,2228,0.46351903411131057,2911.580057,1040.0,1016.0337227719929 -priorityencoder,tsmc28,64,13786,0.07244435673872045,137.088001,428.365,18.328422254896275 -mult,sky90,16,1056,1.003036696969697,8028.160147,5580.0,7355.26809887879 -comparator,sky90,64,10,1.1199110000000019,1008.42002,127.626,0.9306460410000015 -mux4,tsmc28,32,1000,0.14417800000000003,54.431999,184.849,2.3082897800000004 -mux8,sky90,16,3280,0.3052030487804878,644.840013,287.966,131.84771707317074 -mux8,tsmc28,16,12006,0.08308368748958854,88.83,424.362,23.9281019970015 -add,tsmc28,16,1000,0.476658,32.886,116.238,7.459697700000001 -flop,sky90,16,5000,0.11434400000000002,266.5599975,129.629,221.34139800000005 -mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50011.4036139272 -mult,tsmc28,16,4053,0.2587838166790032,1990.674001,14800.0,1757.1421152504315 -shiftleft,tsmc28,8,16094,0.06713995712687958,44.982,306.629,16.019593770473463 -shiftleft,sky90,8,5000,0.22681400000000002,277.340005,226.865,87.55020400000001 -flop,tsmc28,32,21528,0.04889213340765514,60.4799995,314.581,114.6520528409513 -shiftleft,tsmc28,128,10000,0.144407,2548.727987,12900.0,1076.987406 -mux8,tsmc28,1,15040,0.06648836170212766,6.93,29.341,1.8171269253191489 -flop,sky90,32,9270,0.11434386515641856,533.119995,259.258,820.7030921601942 -mult,tsmc28,64,2634,0.4097567213363706,19951.974017,141000.0,24597.695981822326 -shiftleft,tsmc28,8,14272,0.07000526457399102,38.052,274.573,12.915971313901343 -mult,tsmc28,32,3216,0.32725927363184076,6856.037993,51400.0,7051.782828218905 -add,sky90,128,2000,0.549155,6414.100125,2390.0,2531.055395 -add,sky90,64,1000,0.996359,1638.560032,229.217,405.51811299999997 -priorityencoder,tsmc28,128,12373,0.08597314281095936,298.494001,967.377,43.1585176911016 -mux4,tsmc28,8,10000,0.08076,15.12,51.529,3.303084 -mux2,tsmc28,16,10000,0.08561,14.49,74.123,4.1520850000000005 -csa,tsmc28,16,25854,0.040894734431809396,100.799999,947.479,45.515839422603854 -add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,939.1435764188874 -comparator,tsmc28,16,10,0.16839600000000132,29.736,97.848,0.011450928000000091 -mux2,sky90,1,11083,0.08482327754218172,11.76,9.281,3.5032013624921055 -priorityencoder,sky90,32,1000,0.879197,196.980004,12.728,10.48002824 -mux4,sky90,64,3419,0.2917661822170225,1055.46002,266.494,272.80138037291607 -mux4,sky90,8,4845,0.2105163488132095,160.720002,75.759,41.99801158823529 -mux2,sky90,1,11806,0.08300869354565475,13.72,12.3,3.8183999031001186 -mux4d,tsmc28,1,10000,0.043929,2.268,8.156,0.23194512 -csa,tsmc28,16,23865,0.04077636748376283,98.783999,946.879,41.75500030337314 -mux2,sky90,16,3920,0.23789204081632653,115.640002,38.479,39.39492195918368 -priorityencoder,sky90,8,10000,0.127398,59.780001,40.39,15.682693800000004 -priorityencoder,sky90,64,4430,0.2508476343115124,804.580015,353.584,79.76954771106094 -priorityencoder,tsmc28,128,20000,0.087283,307.062,958.128,71.57206000000001 -mux2,tsmc28,8,5000,0.08170100000000001,7.56,38.026,0.9836800400000001 -flop,tsmc28,16,40000,0.048892000000000005,30.24,157.29,106.5161112 -mux8,tsmc28,16,10000,0.097831,59.094,224.879,13.490894900000002 -csa,sky90,8,6350,0.16398131496062993,392.000008,245.798,189.23443746456692 -priorityencoder,sky90,16,5715,0.17383212773403323,131.320003,73.59,20.66863998757655 -csa,tsmc28,16,1000,0.067577,34.271999,114.459,0.9737845699999998 -flop,sky90,16,8920,0.11434462331838566,266.5599975,129.629,394.83198431838565 -mux2d,sky90,1,30000,0.07565933333333333,19.6,18.562,13.701905266666666 -mux2,tsmc28,8,29041,0.033768075961571574,16.884,113.726,5.335356001928308 -flop,sky90,16,10,0.1143419999999935,266.5599975,129.629,0.4426750529999749 -mux2,sky90,64,3710,0.2662697789757412,455.700009,143.8,163.22337451212937 -flop,sky90,16,7708,0.11434433990659056,266.5599975,129.629,341.20351028126623 -csa,sky90,8,6228,0.16536218946692358,338.100007,209.02,160.40132378291585 -add,tsmc28,32,9810,0.1019177991845056,209.286002,1060.0,81.43232154841998 -mux2,sky90,64,4265,0.24438158851113717,479.220007,123.342,163.24690112543962 -mux2d,sky90,1,12689,0.07841941673890772,18.62,16.576,5.8892981970919696 -csa,sky90,64,6228,0.16536218946692358,2697.940053,1590.0,1263.863214095697 -mux8,sky90,16,10,0.7726059999999961,430.220008,28.957,0.5315529279999974 -flop,tsmc28,64,10000,0.048892000000000005,120.959999,630.0,106.51366660000002 -comparator,sky90,128,10,1.136650000000003,1997.240039,243.506,1.7868138000000047 -csa,sky90,64,6350,0.16398131496062993,3136.000061,1970.0,1508.3001350078741 -mux2,tsmc28,64,18546,0.05385698274560552,90.846,517.414,27.359347234767604 -comparator,tsmc28,128,15000,0.10985066666666667,491.022003,2380.0,182.90136 -comparator,tsmc28,64,10223,0.09778464423359091,235.62,932.855,54.8571854150445 -shiftleft,tsmc28,128,7304,0.14135928148959476,2368.547985,11900.0,680.6449403723987 -mux4d,tsmc28,1,32558,0.03008041734750292,4.158,30.464,1.2543534033908719 -mux4,sky90,128,5000,0.309527,3004.680057,1720.0,1123.273483 -mux8,tsmc28,128,10250,0.0974409756097561,712.907993,3150.0,163.11619317073172 -mux2,tsmc28,32,18977,0.055856368077146015,32.130001,171.146,10.495411561695736 -flop,tsmc28,8,21114,0.048891939945060144,15.12,78.6345,28.12997764739035 -flop,sky90,128,8746,0.11434398307797851,2132.4799805,1035.0,3096.6065777262747 -priorityencoder,sky90,64,2823,0.35280208537017355,594.860012,101.901,38.77294918218207 -mux2,tsmc28,1,53922,0.01965830618300508,2.142,15.112,0.6369291203293646 -mux2,tsmc28,8,15000,0.06463066666666667,7.56,38.116,2.333167066666667 -flop,tsmc28,8,19044,0.048891976895610166,15.12,78.634,25.35293461921865 -inv,tsmc28,1,25000,0.014172,0.252,1.005,0.036592104 -csa,sky90,128,5862,0.16964724223814398,5091.100099,3510.0,2527.404614863869 -priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,10.26960087527008 -shiftleft,tsmc28,16,5000,0.16564500000000001,43.722,136.668,11.4791985 -add,sky90,16,3367,0.330827297000297,716.380014,469.58,382.105528035343 -mult,sky90,64,2500,1.551295,96693.661592,61800.0,429972.43515 -mux2,sky90,8,5940,0.18835116835016835,64.680001,21.541,21.60387900976431 -comparator,sky90,128,3148,0.3487390076238882,3087.000056,1340.0,521.364816397713 -priorityencoder,tsmc28,32,30000,0.06037733333333333,67.032,248.971,18.475464 -comparator,sky90,8,4364,0.22746457103574702,187.180004,82.043,36.4170778228231 -mux8,sky90,8,10,0.5354459999999932,234.220005,15.482,0.17610818939999778 -add,sky90,64,2000,0.499819,2649.920052,942.503,994.63981 -add,sky90,8,3502,0.2823721136493432,243.040004,150.588,123.96135789206168 -priorityencoder,tsmc28,32,5000,0.187693,26.334,75.077,3.6750289400000002 -mux4d,tsmc28,1,20000,0.043929,2.268,8.152,0.4634509500000001 -flop,sky90,32,7872,0.11434452032520326,533.119995,259.258,696.8726791219513 -csa,sky90,128,10,0.2641830000000027,1881.599976,220.98,1.7832352500000184 -add,sky90,16,3242,0.33159157310302284,708.540014,495.091,358.7820820974708 -priorityencoder,sky90,128,3424,0.29467407476635515,1618.960031,588.849,124.05778547663552 -csa,tsmc28,16,24362,0.0409855330432641,104.832,960.623,43.48565055890321 -mux8,tsmc28,1,10,0.08395400000000564,5.04,17.289,0.0010275969600000692 -mux2d,tsmc28,1,51887,0.018931650182126544,3.276,26.574,0.9106123737602868 -mux8,tsmc28,8,5000,0.120592,30.114,99.721,3.5815824000000003 -mux8d,tsmc28,1,20620,0.04721960523763336,6.804,34.356,1.7754571569350146 -csa,sky90,64,1000,0.264181,940.799988,110.49,89.0025789 -add,tsmc28,32,10002,0.10404300399920016,214.074002,1100.0,88.33251039532094 -shiftleft,tsmc28,128,7445,0.14241033445265278,2676.617983,13800.0,812.0237270490261 -mux8,sky90,32,3092,0.33231726520051746,1062.320021,325.058,219.99402956274258 -csa,sky90,32,5740,0.16671402787456446,1160.320023,826.559,570.4954033867597 -priorityencoder,sky90,32,3458,0.2874674997108155,252.840005,73.577,24.808445225043375 -add,tsmc28,32,10195,0.10213029769494851,224.658002,1150.0,91.30448613928397 -shiftleft,sky90,128,2478,0.5244642510088782,10362.520144,7760.0,4248.6848974229215 -shiftleft,sky90,128,1869,0.5349974788657036,8478.960135,5130.0,2594.7377724986623 -mux4,tsmc28,1,27305,0.03808832906061161,5.292,41.928,1.7787249671305623 -mux2,tsmc28,16,20000,0.05247,15.372,84.373,5.294223 -priorityencoder,tsmc28,64,14613,0.07257421788818175,137.592,430.216,19.0870193045918 -mux8,tsmc28,32,10,0.16755700000000218,105.713999,407.748,0.03230498960000042 -flop,tsmc28,64,20286,0.04889208035098097,120.959999,630.0,216.05410307098495 -mult,tsmc28,64,2341,0.42716787697565145,14834.610092,97900.0,17120.461341307135 -flop,sky90,32,3000,0.11434433333333333,533.119995,259.258,265.62188633333335 -comparator,tsmc28,128,9754,0.1048280422390814,597.366001,2680.0,141.0985448538036 -mux2d,sky90,1,14010,0.07565858743754461,19.6,18.562,6.400716497216275 -add,sky90,32,2225,0.449049202247191,1172.080023,416.347,447.7020546404495 -add,sky90,16,2207,0.4462947607612143,455.700009,91.288,163.79017719936564 -mux2,tsmc28,32,16471,0.060023767895088335,29.484001,155.318,9.477752950634446 -mux4,sky90,1,7844,0.12749297654258032,28.420001,24.639,6.463893910708823 -mux4,tsmc28,8,17868,0.05579297268860533,22.176,103.126,6.7565289925901055 -priorityencoder,tsmc28,128,10000,0.09996100000000001,204.498,547.044,26.589626000000006 -mux4,tsmc28,32,14561,0.0683906018817389,73.079999,333.732,23.799929454845135 -inv,sky90,1,25000,0.034255,2.94,3.973,1.08074525 -shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,101.80062296359652 -mux4,tsmc28,1,26255,0.03808798324128737,5.292,41.928,1.7101504475338032 -mux8,sky90,128,2666,0.37501377344336084,3761.240072,1460.0,854.281375903976 -priorityencoder,sky90,8,8634,0.12521217211026175,64.680001,49.589,15.82681855473709 -mux4,sky90,16,4274,0.2318758591483388,367.5,398.159,99.24286771548901 -csa,tsmc28,16,10,0.06758100000000411,34.271999,114.459,0.009738422100000593 -csa,sky90,16,5617,0.17789797739006588,580.160011,413.466,298.1570101057505 -add,sky90,8,3801,0.2758526608787161,257.740005,154.606,129.9266032738753 -priorityencoder,sky90,8,7977,0.12510041118214868,54.880001,38.166,11.97210935013163 -mux2d,tsmc28,1,52905,0.01893180512238919,3.276,26.574,0.9295516315093093 -add,sky90,16,3055,0.33029724222585927,678.160013,461.632,320.0580277168576 -add,tsmc28,32,10000,0.10397500000000001,213.696002,1120.0,89.62645 -priorityencoder,sky90,128,3244,0.30802940567200987,1532.720028,470.562,99.4934980320592 -add,tsmc28,8,30000,0.07291133333333333,40.698001,243.583,33.39339066666666 -flop,tsmc28,64,20000,0.048892000000000005,120.959999,630.0,212.997998 -mux8,tsmc28,32,11452,0.08728999196646874,163.421999,716.958,42.51022608767027 -comparator,tsmc28,8,16010,0.06241596189881324,26.964,121.779,7.084211675515301 -mux2,sky90,64,4410,0.2421643696145125,487.060007,132.876,170.7258805782313 -add,sky90,32,2300,0.43468360869565215,1172.080022,411.51,420.7737332173913 -comparator,tsmc28,8,10,0.138023000000004,14.994,49.297,0.004761793500000138 -flop,tsmc28,8,10000,0.048892000000000005,15.12,78.6345,13.321114320000003 -flop,sky90,64,6996,0.1143438221841052,1066.23999,520.0,1238.5151099871355 -mux2,sky90,64,4500,0.2432312222222222,491.96001,109.223,164.66753744444446 -flop,sky90,8,7544,0.11434467338282078,133.279999,64.8145,167.02326441028632 -shiftleft,sky90,32,2635,0.3871506413662239,1704.220026,1140.0,488.1969587628084 -priorityencoder,tsmc28,16,20770,0.048116364949446315,22.806,79.733,4.065832838228213 -csa,tsmc28,8,26351,0.04022622393837046,52.416,489.192,23.210531212439758 -mux8,sky90,1,5443,0.18314221201543268,69.580001,39.989,11.684473126584605 -mult,sky90,32,10,8.063384999999997,12417.580067,1180.0,374.1410639999998 -shiftleft,sky90,128,10,2.072533000000007,4988.200097,311.979,19.813415480000064 -csa,tsmc28,128,5000,0.067577,274.175995,917.068,39.05950599999999 -add,sky90,16,3966,0.3380682173474534,711.480014,493.165,413.79549803328297 -mux8d,sky90,1,7854,0.13242765673542145,89.180001,45.284,16.672641982989564 -flop,sky90,128,10,0.1143419999999935,2132.4799805,1035.0,3.537741479999799 -csa,tsmc28,32,24860,0.04049226146419952,209.664,1930.0,87.34180797827837 -mux4,sky90,16,4750,0.25584431578947364,359.660007,120.182,89.80135484210524 -priorityencoder,tsmc28,32,10000,0.09431300000000001,31.374,89.513,4.706218700000001 -mux4,tsmc28,64,14495,0.0712843066574681,225.287996,1000.0,59.95010189893067 -add,tsmc28,128,10000,0.136438,980.532009,4830.0,506.86717000000004 -shiftleft,tsmc28,128,10,0.6244959999999935,621.683997,1970.0,1.3738911999999859 -priorityencoder,sky90,32,7923,0.21766481761958853,376.320006,196.516,80.75364733686733 -shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2900.3935725432616 -priorityencoder,sky90,32,10000,0.21766400000000002,373.380006,197.593,102.30208 -mux8,sky90,32,1000,0.901778,822.220016,56.227,121.74003 -flop,tsmc28,32,1000,0.04889200000000005,60.4799995,314.5805,5.333139360000005 -csa,sky90,64,10,0.2641830000000027,940.799988,110.49,0.8900325270000092 -mux4,sky90,64,3921,0.3027099803621525,1673.840027,1300.0,540.6400249268044 -priorityencoder,tsmc28,64,12959,0.07709544802839725,118.692,338.228,16.02814364510379 -add,tsmc28,128,7660,0.1359693028720627,941.976009,4660.0,370.3803810234988 -priorityencoder,sky90,128,2824,0.3539966487252124,1485.680028,304.73,82.83521580169969 -add,sky90,16,3118,0.3282264092366902,736.960014,480.371,348.57644660936506 -mux4,tsmc28,128,5000,0.195368,225.539998,875.821,68.574168 -mux8d,tsmc28,1,23691,0.04558612198725254,20.664,171.151,7.161579764197374 -comparator,sky90,128,2798,0.366175141529664,2806.720054,989.569,387.04712459685487 -flop,sky90,16,8746,0.11434398307797851,266.5599975,129.629,387.1687267020352 -comparator,tsmc28,64,11501,0.09215196095991653,260.693999,1150.0,63.861308945222156 -priorityencoder,sky90,64,3244,0.3076284056720099,675.220013,189.065,46.667229140443894 -mux8d,sky90,1,10000,0.132428,91.140001,43.658,21.2017228 -priorityencoder,sky90,64,2937,0.3404264865509023,638.960011,137.801,47.898006657711946 -comparator,sky90,8,4550,0.21930021978021977,185.220003,114.347,39.58368967032966 -mux2,sky90,8,6033,0.1883510140891762,64.680001,21.541,21.94289314138903 -mux8,tsmc28,1,15680,0.06278251020408164,11.718,79.753,3.635107340816327 -flop,tsmc28,128,20286,0.04889208035098097,241.919998,1260.0,432.0837601017943 -mult,sky90,8,10000,0.771349,2152.08004,1430.0,11146.764399 -mux8,tsmc28,128,10464,0.09547474923547401,664.775992,2850.0,153.04602302446486 -mux8,tsmc28,8,13159,0.07636861653621096,46.494,227.561,12.287710400676342 -mux4,sky90,128,3360,0.31526604761904764,2191.280043,879.483,666.1571586190477 -mux2,tsmc28,8,30227,0.03374500526019784,16.758,114.743,5.567925867932642 -mux2,tsmc28,32,15000,0.06414566666666667,30.240001,154.587,9.275463400000001 -mux4,tsmc28,64,14205,0.0709287472720873,145.655999,671.083,46.52925821048927 -mux8d,tsmc28,1,21498,0.04628595497255559,17.514,137.759,5.702429652618848 -mult,tsmc28,32,2791,0.35829251809387314,3917.214024,26100.0,4235.734148905768 -shiftleft,tsmc28,16,10,0.16585499999999342,43.722,137.701,0.02141188049999915 -flop,sky90,128,5000,0.11434400000000002,2132.4799805,1035.0,1770.3309800000002 -mux8,sky90,32,3079,0.3246647729782397,1050.56002,350.25,223.69402858200718 -mux4,sky90,8,4558,0.21942147125932426,136.220003,45.945,38.57429464738921 -csa,tsmc28,64,25000,0.040492,419.327999,3860.0,175.573312 -shiftleft,tsmc28,64,10000,0.12092800000000001,1066.337997,5680.0,359.51894400000003 -comparator,sky90,8,5000,0.21538000000000002,244.020004,169.251,67.62932 -mux2,tsmc28,16,5000,0.08561,14.49,74.123,2.0803230000000004 -flop,tsmc28,16,10,0.048889000000002625,30.24,157.29,0.026541838100001425 -mux4,tsmc28,128,1000,0.36021400000000003,213.569998,724.573,25.611215400000003 -mux2d,tsmc28,1,46800,0.021009521367521367,1.89,12.344,0.5670469817094017 -flop,sky90,16,8364,0.11434401912960306,266.5599975,129.629,370.2459339416547 -mux8,tsmc28,8,13981,0.07549864194263645,46.494,225.202,12.92536750057936 -mux2,sky90,32,5247,0.2190400962454736,237.160005,57.793,88.27315878692586 -csa,sky90,16,6594,0.1653620178950561,736.960014,439.259,348.0870476690931 -mux4,tsmc28,32,20000,0.065929,117.557998,490.72,36.854310999999996 -mult,sky90,128,1500,1.9212156666666664,309312.504297,146000.0,926604.237249 -add,tsmc28,8,10,0.25532499999999914,15.75,58.809,0.01820467249999994 -flop,sky90,32,8571,0.11434450029168125,533.119995,259.258,758.7329316854508 -mux2,sky90,16,1000,0.606993,113.680002,19.729,23.4299298 -mux2,tsmc28,1,48835,0.01965811682195147,2.142,15.112,0.5779486345653732 -add,sky90,64,10,7.080673000000004,927.079988,110.998,23.005106577000014 -mux8,tsmc28,64,5000,0.19365200000000002,209.159999,779.626,48.993956000000004 -mult,sky90,64,683,1.5572078433382137,95922.401617,59200.0,114920.38163051684 -comparator,tsmc28,8,5000,0.12846600000000002,14.994,50.165,2.3689130400000007 -mux4,sky90,16,4537,0.2635969625303064,358.680007,100.453,94.8949065109103 -shiftleft,tsmc28,32,5000,0.19764500000000002,109.116,365.184,35.912096500000004 -mult,tsmc28,32,3034,0.3302978905735003,6116.670002,45200.0,6186.149192551086 -shiftleft,sky90,8,4673,0.22946229210357372,243.040005,191.378,70.21546138369355 -mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6375.600754155466 -flop,sky90,128,8920,0.11434462331838566,2132.4799805,1035.0,3158.198496053812 -comparator,tsmc28,16,13709,0.07280278080093369,48.132,204.944,11.852292714392004 -mux8,sky90,32,3155,0.3146512107765452,1237.740008,639.983,323.14679346751194 -comparator,sky90,32,3523,0.28283299233607717,713.440014,308.366,110.87053299574227 -priorityencoder,sky90,8,8954,0.12869292986374803,55.860001,32.802,13.770143495421042 -add,sky90,8,3950,0.26842455696202533,271.460005,148.135,130.99118379746835 -mult,tsmc28,128,1906,0.5298619716684155,47270.53821,285000.0,54324.09864530429 -priorityencoder,sky90,32,4871,0.21364865366454527,392.980008,269.469,55.33500129911722 -priorityencoder,tsmc28,32,17797,0.06230824537843457,56.826,201.644,9.720086279035794 -mux4d,sky90,1,10090,0.10307702775024777,51.940001,49.18,14.183399018434091 -priorityencoder,tsmc28,64,14061,0.07336369710546903,141.624001,436.895,19.441379732949294 -add,tsmc28,16,12197,0.08589137394441257,99.666001,538.564,39.853597510207436 -mult,sky90,32,1000,1.318548,26428.640468,17200.0,34487.941488 -priorityencoder,sky90,32,3661,0.2722724127287626,259.700005,52.373,23.252064047036328 -add,sky90,32,1500,0.6659446666666666,815.360016,146.423,235.07846733333332 -priorityencoder,sky90,32,3254,0.30692107498463433,309.680005,71.079,26.456596663675477 -csa,sky90,8,5000,0.190168,188.160004,111.708,86.146104 -mux4d,sky90,1,9313,0.10687378513905293,53.900001,55.277,14.021840610243743 -mux2,sky90,128,4337,0.25870912958266085,1099.560018,533.373,434.8900468284529 -add,tsmc28,128,7810,0.13571597311139563,963.270009,4770.0,387.87625115236875 -mux8,sky90,8,3869,0.2781557195657793,312.620006,98.575,60.97173372881882 -add,tsmc28,32,1000,0.923854,67.157999,231.062,30.117640399999996 -comparator,sky90,16,4349,0.24600491676247413,415.520007,287.169,94.21988312002759 -priorityencoder,tsmc28,8,5000,0.057811,4.284,14.558,0.24858730000000004 -mux8,sky90,128,2794,0.36125880672870436,4775.54006,2600.0,1173.368604254832 -mux4,sky90,128,3037,0.3291043081988805,1765.960034,549.903,537.7564395969706 -mult,sky90,32,794,1.3013988438287152,26529.580464,17000.0,27460.817003629716 -csa,tsmc28,8,10,0.06758100000000411,17.136,57.229,0.004872590100000297 -mux2,tsmc28,128,15917,0.06280590940503863,162.792,942.721,50.181921614625864 -priorityencoder,sky90,16,5102,0.19346356801254408,133.280003,47.464,16.173554285848684 -shiftleft,sky90,16,3489,0.3163680759529951,644.840009,367.22,204.37377706563484 -shiftleft,sky90,128,2000,0.534484,9473.660144,6430.0,3263.559304 -flop,tsmc28,32,21942,0.04889169692826543,60.4799995,314.581,116.85848941309362 -comparator,sky90,64,3252,0.3233870750307503,1354.360026,435.572,195.00240624354242 -priorityencoder,sky90,64,9249,0.24122679673478215,907.480017,482.887,216.1392098743648 -mux8d,sky90,1,20000,0.13242700000000002,88.200001,45.846,40.25780800000001 -mux8,sky90,1,10,0.27352899999999636,34.300001,4.606,0.01665791609999978 -mux8d,tsmc28,1,21059,0.04721963559523244,6.804,34.408,1.7990681161783562 -add,tsmc28,128,7059,0.14164812508853947,818.748007,3800.0,317.716744573594 -priorityencoder,sky90,16,5983,0.1693952306535183,166.600002,113.125,29.33925394918937 -mux4,tsmc28,32,10000,0.09749100000000001,56.825999,208.953,16.5929682 -priorityencoder,sky90,32,10,0.8791959999999932,196.980004,12.728,0.1043605651999992 -comparator,tsmc28,16,10000,0.09940700000000001,34.02,138.933,7.6245169000000015 -priorityencoder,tsmc28,8,29464,0.033932723051859896,8.316,36.118,1.8086141386641326 -mux2d,tsmc28,1,54939,0.01893200586104589,3.276,26.574,0.9655322989133405 -shiftleft,tsmc28,32,10566,0.1009131951542684,429.534,2150.0,122.20587933181902 -mux4d,tsmc28,1,31201,0.032016254799525655,3.528,24.177,1.0792679492920096 -flop,sky90,8,8528,0.11434378799249531,133.279999,64.8145,188.77587678621015 -mux2,sky90,8,1000,0.415161,58.800001,10.206,7.41477546 -priorityencoder,tsmc28,8,33147,0.03304764271276436,11.34,48.981,2.296811168537123 -shiftleft,sky90,128,1000,0.995988,5011.720098,428.194,865.513572 -priorityencoder,sky90,128,3340,0.2993901976047904,1706.180032,712.585,138.61766149101797 -mux2,tsmc28,64,20478,0.052974893837288796,87.318,456.882,26.80529628166813 -mux8,tsmc28,32,13157,0.08719616835144789,265.104,1290.0,64.00198756996275 -priorityencoder,sky90,8,3000,0.2659083333333333,34.300001,2.992,3.906193416666666 -priorityencoder,tsmc28,16,15000,0.06552166666666667,13.86,43.042,2.463614666666667 -add,sky90,16,3000,0.3504503333333333,648.760012,358.879,285.61702166666663 -shiftleft,tsmc28,8,15487,0.06589728475495576,44.856,300.094,14.655556129502163 -priorityencoder,sky90,64,2643,0.3757099265985623,594.860012,108.4,40.802098028603865 -priorityencoder,tsmc28,64,13234,0.07554394393229559,126.504,374.222,16.672548425857634 -shiftleft,tsmc28,16,11800,0.0847177627118644,130.788,613.549,33.71766955932203 -priorityencoder,tsmc28,64,30000,0.07325033333333333,143.136001,460.222,41.38643833333333 -mux8,tsmc28,16,10000,0.097831,59.094,224.879,13.490894900000002 -add,sky90,16,2357,0.42127313746287653,461.580009,93.497,167.66670871022487 -add,tsmc28,16,11520,0.08680155555555555,90.972001,475.452,33.67900355555555 -priorityencoder,sky90,64,3187,0.3126177097583935,644.840012,169.713,45.82975625058049 -add,sky90,32,2626,0.41115831150038085,1413.160026,589.009,610.9812508895659 -mux4,sky90,1,10,0.22259499999999832,13.72,1.708,0.007234337499999945 -priorityencoder,sky90,8,8323,0.12521198474107895,62.720001,47.365,14.537111428439268 -mux2,sky90,8,5720,0.18835117482517483,64.680001,21.541,20.812804818181817 -comparator,sky90,64,2822,0.3532796109142452,1487.640029,401.686,214.08744421403262 -mux8,tsmc28,16,12506,0.08081961842315688,99.036,498.654,27.559489882296493 -shiftleft,tsmc28,64,7939,0.12624244841919638,968.687991,4910.0,278.995811006424 -priorityencoder,sky90,64,3184,0.313698351758794,643.860013,144.409,42.913934520603014 -flop,sky90,16,10000,0.114344,266.5599975,129.629,442.68279600000005 -priorityencoder,tsmc28,64,13510,0.07461124500370096,136.836,429.547,18.055921290895633 -csa,tsmc28,128,22871,0.04244549263259149,499.96801,3850.0,214.01017385352628 -mux4,tsmc28,16,17125,0.06054816058394161,41.454,196.466,13.381143489051095 -flop,sky90,128,6996,0.1143438221841052,2132.4799805,1035.0,2476.9158761520866 -mux8,sky90,1,6804,0.16910436919459143,74.480001,55.304,14.965736673721342 -csa,tsmc28,64,20000,0.047873000000000006,233.856003,1670.0,85.40543200000002 -mult,sky90,32,810,1.3021829012345678,26464.900467,16900.0,27144.002576234565 -mux2,tsmc28,8,33000,0.033745030303030304,16.758,114.743,6.060607442424242 -mux2,tsmc28,32,5000,0.171662,28.728001,146.078,8.411438 -add,sky90,64,2560,0.46298300000000003,2989.000058,1130.0,1192.6442080000002 -mux2,sky90,128,4240,0.2622730566037736,1283.800021,782.334,509.3342759245284 -flop,tsmc28,32,20286,0.04889208035098097,60.4799995,314.5785,108.02705153549248 -mux2d,tsmc28,1,40000,0.024459,1.89,12.272,0.55424094 -add,tsmc28,64,8872,0.1220601568981064,466.452004,2410.0,195.66243150766454 -priorityencoder,sky90,64,3845,0.2600740234070221,718.340014,247.528,55.3437521810143 -csa,tsmc28,8,23368,0.04244556384799726,31.248001,240.54,13.735384461211915 -flop,tsmc28,16,19226,0.04889189919900135,30.24,157.289,51.19959684119423 -mux2,sky90,128,1000,0.986012,882.000017,156.733,360.880392 -inv,tsmc28,1,80000,0.009913000000000002,0.252,1.094,0.09001004000000001 -mux4,tsmc28,16,18152,0.06084534817100044,40.824,196.722,14.176966123843101 -mux2,sky90,16,4537,0.21640196253030636,114.660002,40.216,41.87377974961428 -mult,tsmc28,32,10000,0.32958600000000005,6886.152,51900.0,22272.433122000002 -mux4,sky90,8,1000,0.399328,116.620002,9.077,9.50001312 -csa,sky90,32,5000,0.190168,752.640015,465.518,347.817272 -mult,tsmc28,128,10,8.397948999999997,26059.445743,91700.0,2157.4330980999994 -shiftleft,sky90,8,10,0.705528000000001,130.340003,10.712,0.17243104320000024 -shiftleft,tsmc28,64,8600,0.12059506976744186,1165.877997,6270.0,333.32477283720937 -mux4,tsmc28,16,16440,0.06065625060827251,39.438,185.149,12.373875124087593 -mux8,tsmc28,128,10,0.39436299999999846,411.011997,1610.0,0.3363916389999987 -flop,tsmc28,16,1000,0.04889200000000005,30.24,157.29,2.6543466800000024 -shiftleft,sky90,64,2248,0.44940285765124555,4417.840063,3480.0,1354.9496158185052 -mux2,tsmc28,16,10000,0.08561,14.49,74.123,4.1520850000000005 -add,sky90,8,3577,0.2783518803466592,248.920005,158.724,124.70164239530332 -shiftleft,sky90,32,1000,0.973356,882.980017,61.502,141.23395560000003 -add,tsmc28,64,10000,0.11947100000000001,441.378003,2170.0,199.87498300000001 -mult,sky90,8,1364,0.7568698299120235,2283.400042,1530.0,1662.8430163167156 -priorityencoder,tsmc28,128,15000,0.08566866666666667,327.096,1080.0,56.19864533333333 -mult,tsmc28,16,4209,0.26194512497030176,1826.496002,13500.0,1725.6944833043478 -priorityencoder,sky90,128,3287,0.304158780042592,1530.760028,528.644,113.14706617584422 -priorityencoder,sky90,32,2960,0.33683483783783785,282.240005,55.579,21.557429621621623 -mux4,sky90,32,3650,0.271347602739726,459.620008,136.481,139.20132020547945 -mux4,tsmc28,32,16381,0.06514933416763323,89.837999,413.021,27.558168352908854 -mux2,sky90,64,3640,0.26519527472527477,453.740009,157.765,163.6254845054945 -priorityencoder,sky90,128,10000,0.292065,1726.760031,791.829,457.081725 -csa,sky90,16,3000,0.2641803333333333,235.199997,27.622,66.91687843333332 -mux8,tsmc28,16,13006,0.07773759034291866,186.101998,913.855,37.85820649700139 -mux4,sky90,64,3995,0.2894468911138924,1482.740026,933.075,413.9090542928661 -mult,tsmc28,16,3663,0.27299327300027304,1326.528005,9230.0,1224.6478226792249 -mux8,tsmc28,128,11532,0.09583922719389526,723.24,3620.0,201.93325169753734 -flop,tsmc28,16,20863,0.048891745194842545,30.24,157.289,55.563023826678815 -comparator,tsmc28,32,13127,0.08809986798202178,147.168,652.42,41.31883808356821 -add,sky90,64,2327,0.46050285990545764,2953.720057,1160.0,1186.2553671164587 -flop,tsmc28,64,21528,0.04889213340765514,120.959999,630.0,229.3041056819026 -add,sky90,32,1928,0.5185401991701245,1050.56002,294.688,355.2000364315353 -comparator,sky90,16,3930,0.2596389262086514,405.720007,196.545,71.1410657811705 -priorityencoder,sky90,16,6443,0.1666292016141549,185.220003,132.902,35.608660384944905 -flop,sky90,64,8920,0.11434462331838566,1066.23999,520.0,1579.2707649618835 -flop,sky90,32,6996,0.1143438221841052,533.119995,259.258,619.3433128602057 -mult,tsmc28,8,5403,0.19335536165093467,543.06,3920.0,373.75591407125677 -add,sky90,64,2500,0.473711,3024.280058,1280.0,1417.817023 -csa,tsmc28,64,10000,0.067577,137.087997,458.434,38.99192899999999 -priorityencoder,sky90,16,6121,0.16939499803953603,166.600002,113.104,29.864338154370206 -priorityencoder,sky90,8,8284,0.12521163061323032,62.720001,47.365,14.461943335828103 -mux4,tsmc28,16,18495,0.06128766720735333,43.218,210.716,14.7090401297648 -comparator,tsmc28,128,9371,0.10671119720414043,558.432,2400.0,127.9467254477644 -mult,tsmc28,32,2913,0.34328370580157913,4543.056018,31800.0,4923.031624900447 -comparator,tsmc28,32,15000,0.08075166666666667,150.444001,791.489,44.574920000000006 -add,tsmc28,8,13224,0.07517108469449485,32.004,169.208,12.14764728663037 -mux8,tsmc28,32,5000,0.190888,105.713999,390.932,24.128243200000004 -mux2d,tsmc28,1,49852,0.019658375752226592,2.142,15.112,0.5897512725667977 -shiftleft,tsmc28,16,5000,0.16564500000000001,43.722,136.668,11.4791985 -mult,tsmc28,32,20000,0.329665,6679.259997,50100.0,43685.227810000004 -mux2,sky90,128,4080,0.2551630392156863,1146.600021,479.203,402.6472758823529 -mux8,sky90,1,6900,0.16910453623188407,75.460001,55.309,15.62525914782609 -priorityencoder,sky90,128,3479,0.3124409192296637,1500.380028,438.268,107.47967621500432 -mux4d,sky90,1,8925,0.11149681792717087,50.960001,49.104,13.535713696358544 -mult,tsmc28,128,1753,0.5704506560182544,39039.588169,211000.0,41421.56303479749 -flop,sky90,64,10000,0.114344,1066.23999,520.0,1770.3881519999998 -priorityencoder,tsmc28,64,10,0.4980240000000009,55.313999,146.464,0.03964271040000007 -comparator,sky90,128,3471,0.3532324116969173,2889.040053,1200.0,468.0329454984155 -flop,tsmc28,32,10,0.048889000000002625,60.4799995,314.5805,0.05332812120000287 -priorityencoder,sky90,64,5000,0.242601,939.820016,513.255,114.26507099999999 -flop,tsmc28,128,21942,0.04889169692826543,241.919998,1260.0,467.38017678575335 -comparator,tsmc28,32,6604,0.15116637976983646,80.387999,260.149,19.77256247389461 -shiftleft,tsmc28,32,10,0.2576830000000001,108.108,362.436,0.08039709600000003 -mux4d,sky90,1,9119,0.1094081470555982,48.020001,46.22,13.041451129027305 -mux8,tsmc28,1,17787,0.06011083544161466,9.072,57.908,2.9875085214482486 -comparator,tsmc28,8,18794,0.05556247078854954,42.966,238.002,11.951487466617007 -priorityencoder,tsmc28,16,22501,0.04496246922359006,30.114,121.215,5.462940010666192 -comparator,sky90,8,4272,0.2326913970037453,157.780003,66.963,29.85430623558052 -mult,sky90,8,1390,0.7671824604316546,2239.300042,1460.0,1632.564275798561 -flop,sky90,8,8920,0.11434462331838566,133.279999,64.8145,197.4960333955157 -mult,tsmc28,128,5000,0.52473,50418.144217,312000.0,152930.45958 -flop,sky90,8,7708,0.11434433990659056,133.279999,64.8145,170.64749287659575 -inv,tsmc28,1,150000,0.008271666666666667,0.378,2.478,0.15641721666666666 -comparator,sky90,128,2973,0.34142357854019506,2936.080056,1160.0,409.7082942482341 -mux4,tsmc28,8,17020,0.05874140658049354,19.404,82.944,5.668545735017626 -mux2,sky90,16,5167,0.20207390090961871,119.560002,32.354,40.51581713237856 -mux2,sky90,8,5170,0.1926155976789168,70.560001,12.959,18.722236094390713 -mux8,sky90,32,3016,0.32768098673740054,1029.980004,375.119,244.45001610610078 -flop,sky90,64,3000,0.11434433333333333,1066.23999,520.0,531.1294283333333 -mux8,sky90,8,1000,0.535445,234.220005,15.482,17.61078605 -mult,sky90,16,1017,1.006137169124877,7719.460138,5170.0,6970.518307697148 -add,sky90,16,2507,0.3986071272437176,500.78001,170.515,191.73002820422815 -csa,sky90,8,6106,0.16536133770062234,330.260006,208.877,156.43182546478874 -flop,sky90,32,15822,0.11434413487548983,533.119995,259.258,1400.6584801573129 -mux2,tsmc28,32,1000,0.17166199999999998,28.728001,146.078,1.68057098 -csa,tsmc28,64,25357,0.04036684189770083,403.199997,3800.0,174.5058575237607 -add,sky90,32,1847,0.5378555165132648,924.140018,210.187,308.1912109621008 -mult,sky90,32,841,1.2987606420927467,27230.280486,18100.0,31555.987320927466 -flop,sky90,8,7872,0.11434452032520326,133.279999,64.8145,174.2724834276423 -csa,sky90,64,6106,0.16536133770062234,2597.000051,1650.0,1245.6669568987882 -mux2,tsmc28,8,10000,0.08170100000000001,7.56,38.026,1.9689941000000004 -shiftleft,tsmc28,32,9196,0.10872793170943888,270.899999,1350.0,75.2397287429317 -mux4,sky90,128,3166,0.3157249696778269,1984.500039,725.267,569.5678452987997 -mux4,tsmc28,8,10,0.07530199999999354,15.12,54.869,0.0026807511999997697 -csa,tsmc28,16,5000,0.067577,34.271999,114.459,4.8723016999999995 -comparator,sky90,128,2856,0.3762330560224089,2648.940052,834.031,344.6294793165266 -inv,tsmc28,1,20000,0.014172000000000004,0.252,1.005,0.029307696000000008 -shiftleft,sky90,16,3623,0.31161835274634275,610.540011,408.291,192.26852364449348 -add,tsmc28,64,8203,0.12186861952944045,392.616003,1800.0,142.34254761038645 -priorityencoder,sky90,32,3081,0.32202494482310934,268.520005,41.654,18.38762434939954 -flop,tsmc28,64,20700,0.04889217874396135,120.959999,630.0,220.47928004589374 -priorityencoder,sky90,32,4848,0.21353962706270627,415.520008,277.119,61.4994125940594 -mux2,tsmc28,128,17610,0.05977891709256105,201.347999,1400.0,70.65868000340716 -mux8,sky90,64,2959,0.33729801081446437,2162.860042,711.961,441.8603941669483 -add,sky90,128,2010,0.5301124378109453,6693.40013,2760.0,2484.1068835820897 -flop,tsmc28,16,5000,0.04889200000000002,30.24,157.29,13.320625400000006 -priorityencoder,sky90,64,3062,0.3258939320705421,635.040012,145.835,43.7023762906597 -mux8,sky90,32,3282,0.3215692608165752,1068.20002,336.387,234.42399113528336 -shiftleft,tsmc28,16,11555,0.08623462224145391,123.984,583.737,33.286564185201215 -mux4d,tsmc28,1,33236,0.03008085654110001,4.158,30.464,1.2781355944313395 -csa,sky90,128,6472,0.16536174289245983,5996.620117,3450.0,2713.586200865266 -flop,sky90,128,8046,0.11434435918468804,2132.4799805,1035.0,2848.661020368133 -mux8d,sky90,1,8005,0.1324279237976265,89.180001,45.311,17.030231000374766 -mux4d,sky90,1,30000,0.10307733333333333,51.940001,49.18,42.15862933333334 -mult,tsmc28,8,5195,0.19249278152069296,564.858,4230.0,369.0086621751684 -csa,tsmc28,16,24860,0.04049226146419952,104.832,964.99,43.73164238133548 -add,tsmc28,8,12943,0.07722184037703778,31.626,164.58,11.946218706327745 -mux4,tsmc28,64,20000,0.068989,205.883996,807.791,70.161813 -comparator,sky90,64,2000,0.469373,1392.580027,201.764,124.4777196 -mult,sky90,64,645,1.552430596899225,92630.581541,51300.0,97807.78489644187 -shiftleft,tsmc28,16,11309,0.08835714811212309,95.003999,416.622,27.125644470421786 -mux8d,sky90,1,6948,0.14063830972941851,110.740002,90.7,24.38668290708117 -mux2,tsmc28,32,16829,0.058776237150157465,29.736001,158.536,9.568771408045636 -priorityencoder,tsmc28,16,20000,0.049953000000000004,17.514,56.066,3.2519403000000007 -mux2,sky90,128,5000,0.23978300000000002,1465.100028,826.847,544.7869760000001 -priorityencoder,tsmc28,128,10972,0.0911070864017499,263.466001,738.366,32.61633693182647 -inv,sky90,1,20000,0.045664,1.96,1.986,1.04387904 -csa,sky90,32,1000,0.264181,470.399994,55.245,44.4880804 -mux4,sky90,8,5000,0.210603,159.740002,79.159,44.1423888 -add,tsmc28,128,8548,0.1331574295741694,1008.37801,5090.0,444.2131850594291 -shiftleft,sky90,16,3000,0.33238233333333334,585.060009,308.114,154.557785 -comparator,sky90,16,4014,0.24886605181863478,355.740006,188.666,62.714245058295965 -mux4,tsmc28,8,15000,0.06615766666666667,15.624,57.62,4.439179433333333 -priorityencoder,sky90,64,3003,0.332536333000333,587.020011,97.483,33.8521986994339 -priorityencoder,sky90,8,7517,0.13222679459890913,47.040001,18.57,10.221131222495675 -add,sky90,8,10,1.0608979999999946,103.879999,14.319,0.39889764799999794 -mux4,tsmc28,8,5000,0.08076000000000001,15.12,51.529,1.6515420000000003 -mux8d,sky90,1,7401,0.13412587609782461,85.260001,43.581,15.357412813200918 -mux8,tsmc28,64,10780,0.09259537847866418,298.745999,1470.0,84.35438979406308 -shiftleft,sky90,64,2293,0.4567638996947231,3589.740062,2130.0,988.8938428390754 -mux8,sky90,1,6678,0.1691044327643007,74.480001,55.304,14.695175207217732 -add,tsmc28,64,7701,0.12983626580963512,338.436001,1530.0,123.8637975823919 -flop,sky90,32,10,0.1143419999999935,533.119995,259.258,0.88306326599995 -add,sky90,128,1924,0.5200035197505197,6312.180123,2420.0,2192.334839268191 -priorityencoder,tsmc28,128,12140,0.08441532289950576,335.790001,1110.0,46.09076630313015 -add,sky90,8,3652,0.2733695629791895,245.000005,139.276,101.6934774282585 -csa,tsmc28,8,22871,0.04244549263259149,31.248001,240.54,13.442487516741723 -comparator,tsmc28,16,14280,0.0725140112044818,61.992,277.523,16.37366372997199 -mux4d,sky90,1,9896,0.10307692966855295,51.940001,49.18,13.905077812287791 -flop,tsmc28,16,21942,0.04889169692826543,30.24,157.29,58.442689923202074 -mux8,sky90,32,10,0.9133680000000055,822.220016,56.003,1.2659280480000077 -priorityencoder,sky90,16,6213,0.16662884081764043,185.220003,132.902,34.19223813577982 -mux4,tsmc28,128,12287,0.0813348316106454,285.641997,1290.0,84.26288554862863 -mux2,sky90,64,3500,0.2690062857142857,453.740009,157.765,159.78973371428572 -shiftleft,tsmc28,128,15000,0.14268966666666666,2370.311985,11900.0,1477.1234293333332 -mux4,sky90,64,3698,0.2703524413196322,875.140017,296.755,274.40772793942665 -shiftleft,tsmc28,8,13969,0.07110908568974156,27.468,148.616,8.291319391423865 -mux2,tsmc28,1,50870,0.01965795164143896,2.142,15.112,0.6034991153921762 -add,tsmc28,8,14349,0.07012526768415918,37.674,209.478,14.270491973726394 -mux4,sky90,128,3231,0.3096037022593624,2019.780039,760.934,619.8266119232435 -comparator,tsmc28,64,10010,0.09986309990009991,225.414001,913.668,51.729085748251755 -csa,tsmc28,128,23368,0.04244556384799726,499.96801,3850.0,218.6795449448819 -mux8,sky90,128,2737,0.37361253671903544,4183.620077,1520.0,872.0116607022287 -priorityencoder,sky90,16,5641,0.1771795330615139,142.100003,78.715,24.681108955468886 -mux8,tsmc28,32,11695,0.08537362676357418,168.21,815.694,46.35787933262078 -mult,sky90,16,917,1.0904565408942202,6101.480106,2770.0,4877.612107419846 -mux4,sky90,16,4359,0.22812741523285157,359.659999,419.781,99.919807871989 -flop,sky90,32,8046,0.11434435918468804,533.119995,259.258,712.2510133614218 -add,sky90,16,3305,0.32682486081694406,772.240014,605.439,437.945313494705 -comparator,tsmc28,128,10000,0.104577,536.004,2400.0,125.17866900000001 -mux4,sky90,64,10,1.1701199999999972,834.960016,55.508,2.4537416399999943 -comparator,sky90,64,1000,0.968153,1008.42002,127.665,80.84077549999999 -priorityencoder,sky90,32,2779,0.35866266966534727,252.840004,35.176,18.75805762349766 -mux2,sky90,64,4474,0.24442363433169426,564.480008,225.034,208.9822073535986 -mux4d,sky90,1,10478,0.10307706069860662,51.940001,49.18,14.729711973830884 -priorityencoder,sky90,32,4557,0.2226836157559798,389.060007,217.297,51.818477386416504 -priorityencoder,tsmc28,64,5000,0.199836,68.292,191.082,9.532177199999998 -mux2,tsmc28,16,19150,0.05221932114882506,15.75,88.448,5.1592689295039165 -shiftleft,tsmc28,16,1000,0.166466,43.722,137.066,2.20068052 -mux2,sky90,64,3811,0.26210032065074784,458.640009,159.734,169.05470681973236 -flop,tsmc28,8,30000,0.048891333333333335,15.12,78.635,39.963775866666666 -mux4,tsmc28,64,10000,0.09903100000000001,111.887999,417.29,35.453098000000004 -flop,sky90,16,8396,0.11434433539780849,266.5599975,129.629,371.6190900428776 -priorityencoder,tsmc28,16,10000,0.09986,13.608,39.317,2.636304 -mux8d,tsmc28,1,22814,0.045585734285964755,20.664,171.151,6.865211583466293 -priorityencoder,sky90,32,3262,0.30541339239730225,301.840006,59.21,29.502933705579395 -priorityencoder,sky90,8,7210,0.12694025520110958,48.020001,27.514,9.761705624965325 -priorityencoder,sky90,64,1000,0.995802,466.480009,42.972,27.7828758 -mux8,sky90,64,2939,0.3408547863218782,2212.840042,758.342,464.92592854304183 -add,sky90,32,2075,0.4813427108433735,1103.480021,324.051,396.6263937349398 -mux2,sky90,64,3780,0.26093526455026456,464.520009,160.416,167.7813751058201 -add,sky90,8,3374,0.2961831138114997,237.160004,135.713,118.47324552459989 -csa,sky90,64,5862,0.16922024223814397,2685.200052,1880.0,1305.1957283828044 -mux4,tsmc28,64,13335,0.07477862617172853,136.961999,613.587,43.89505356280465 -flop,tsmc28,32,5000,0.04889200000000002,60.4799995,314.5805,26.641250800000012 -shiftleft,sky90,32,2528,0.40100562025316455,1633.660028,1050.0,453.9383621265823 -mux8,sky90,8,3358,0.2968803073257892,279.300005,100.754,54.32909624061942 -mux2,sky90,32,4775,0.2055040837696335,369.460007,247.031,131.93362178010472 -csa,tsmc28,128,5000,0.067577,274.175995,917.068,39.05950599999999 -mux4,sky90,16,4096,0.241506625,237.160005,79.796,71.1720023875 -mux2,tsmc28,8,27856,0.03986390867317634,11.466,61.225,3.5080239632395176 -csa,tsmc28,128,10,0.06758100000000411,274.175995,917.068,0.07812363600000474 -mux8,sky90,8,3504,0.2822301278538813,245.980005,76.481,48.68469705479452 -priorityencoder,sky90,8,7515,0.1322261989354624,47.040001,18.57,10.221085177711243 -mux4d,tsmc28,1,50000,0.029486,6.426,53.388,2.8866793999999993 -mux2,tsmc28,16,18296,0.053569755574989074,15.246,84.024,4.9659163418014876 -mux4,sky90,16,4714,0.2302920687314383,483.140009,324.67,136.33290468901146 -mux8,sky90,1,5336,0.1838852968515742,72.520001,39.389,14.10400226851574 -csa,tsmc28,32,20000,0.047873000000000006,116.928001,833.393,42.65484300000001 -mux4,tsmc28,16,5000,0.10381800000000001,28.224,95.979,4.1631018 -add,sky90,32,2150,0.4642472790697674,1167.180023,420.572,452.1768498139534 -flop,tsmc28,32,19226,0.04889189919900135,60.4799995,314.5785,102.39674908742847 -csa,sky90,8,5862,0.16847624223814398,282.240005,197.88,142.19394844899352 -mux2,sky90,64,3290,0.289785367781155,446.880009,124.22,152.42710345288754 -flop,sky90,8,8046,0.11434435918468804,133.279999,64.8145,178.13135995586626 -flop,tsmc28,16,20286,0.04889208035098097,30.24,157.289,54.02574878783397 -mux2d,sky90,1,12160,0.07841884210526316,18.62,16.576,5.646156631578949 -csa,tsmc28,128,1000,0.067577,274.175995,917.068,7.8119012 -mux4,tsmc28,64,13625,0.07338049541284404,142.001999,663.723,46.22971211009175 -comparator,sky90,8,1000,0.767356,118.580002,17.625,10.62020704 -mux2,tsmc28,16,1000,0.08560999999999996,14.49,74.123,0.4152084999999998 -mux8,sky90,1,5230,0.18843158891013384,71.540001,35.829,11.268209016826004 -flop,sky90,32,16952,0.11434408966493628,533.119995,259.258,1500.7090048074563 -mux4,tsmc28,128,5000,0.195368,225.539998,875.821,68.574168 -priorityencoder,sky90,64,4514,0.24162600841825432,876.120015,455.06,91.09300517368187 -mux4,sky90,1,7687,0.12838276193573567,28.420001,22.994,6.3164318872381955 -shiftleft,sky90,32,2000,0.498251,1005.48002,200.12,204.28291000000002 -priorityencoder,sky90,16,5204,0.1921378770176787,115.640002,53.779,16.58149878662567 -priorityencoder,sky90,128,3629,0.2911510049600441,1793.400033,746.947,160.13305272802424 -priorityencoder,sky90,128,3150,0.31717431746031743,1637.58003,597.803,124.01515812698412 -shiftleft,tsmc28,32,10370,0.10147801542912246,377.495999,1870.0,110.10364674059788 -mux2,tsmc28,32,17545,0.05664229524080935,30.366001,167.377,9.844430912852664 -csa,tsmc28,128,15000,0.06424366666666667,274.175995,918.863,112.169442 -priorityencoder,tsmc28,16,20337,0.049093460884102866,18.522,62.2,3.4070861853567393 -mux8,sky90,16,3210,0.3112004797507788,895.720004,626.093,217.84033582554517 -mult,tsmc28,64,10000,0.414733,18453.708023,128000.0,88056.11056 -flop,sky90,32,8364,0.11434401912960306,533.119995,259.258,740.4346958737447 -mux2,sky90,16,3680,0.2702771304347826,119.560002,21.04,37.05499458260869 -mux4,sky90,64,3551,0.29908081385525204,1049.58002,327.447,295.19276327513376 -csa,sky90,32,6594,0.1653620178950561,1473.920029,878.519,694.5204751592355 -shiftleft,sky90,8,4762,0.23633680008399832,252.840005,209.827,83.8995640298194 -mult,sky90,8,3000,0.7607253333333333,2160.900039,1510.0,3393.595712 -csa,tsmc28,64,24362,0.0409855330432641,419.327999,3840.0,173.57373243822343 -priorityencoder,sky90,8,6750,0.14314514814814813,51.940001,27.647,9.948587796296296 -mux4,sky90,64,3000,0.3277983333333333,867.300017,244.717,230.44222833333333 -add,sky90,32,2420,0.4132191404958678,1330.840024,582.809,520.6561170247934 -flop,sky90,64,8046,0.11434435918468804,1066.23999,520.0,1424.4448545432513 -mux4,tsmc28,64,13915,0.07180589399928135,137.465999,648.086,45.59674268954365 -mux4,sky90,64,3489,0.29027207595299515,1076.04002,336.513,289.11098764918313 -mult,sky90,64,658,1.5491178389057751,94424.961593,55600.0,102855.22803198785 -priorityencoder,sky90,64,2750,0.36301836363636364,600.740011,92.366,39.31488878181818 -comparator,sky90,128,3000,0.3430833333333333,3056.620058,1270.0,457.3300833333332 -mux2,sky90,32,3000,0.3009513333333333,223.440004,67.032,72.37879566666665 -mult,tsmc28,8,5299,0.1947598518588413,533.484001,3980.0,362.83760401302135 -mux4,sky90,16,4123,0.24214683846713558,291.060006,99.813,71.9660403924327 -mux4,sky90,8,4465,0.22174016573348265,122.500002,39.907,34.901902086450164 -priorityencoder,sky90,16,6361,0.16662898616569724,185.220003,132.902,35.258693472661534 -flop,sky90,128,1000,0.114344,2132.4799805,1035.0,354.066196 -flop,sky90,64,10495,0.1143444683182468,1066.23999,520.0,1858.0404379373515 -mult,sky90,16,1000,1.00493,7795.900143,5200.0,6776.242990000001 -add,tsmc28,64,10,1.836310999999995,135.701998,459.885,1.215637881999997 -mux8,tsmc28,64,11000,0.0907930909090909,304.037999,1490.0,81.89536799999999 -shiftleft,tsmc28,8,10000,0.09206,15.876,52.724,4.78712 -add,sky90,16,1500,0.6662086666666667,349.860007,57.567,99.59819566666668 -csa,sky90,8,1000,0.264181,117.599998,13.811,11.174856299999998 -mux2,sky90,128,4000,0.249989,1274.980022,771.288,452.73007899999993 -mult,tsmc28,64,10,4.225143000000003,6729.029936,23600.0,256.04366580000016 -add,tsmc28,32,5000,0.18802200000000002,110.880001,525.638,37.6796088 -mux2,sky90,32,5182,0.219040685063682,237.160005,57.793,87.17819265534543 -comparator,sky90,128,2000,0.486496,2820.440054,453.463,266.599808 -mux4,sky90,1,8314,0.12749304738994469,28.420001,25.098,6.884624559057013 -flop,tsmc28,8,19872,0.04889206119162641,15.12,78.634,26.46771732608696 -mux8,tsmc28,16,13507,0.08050368520026653,88.956,425.327,25.439164523284227 -mux4,sky90,16,4808,0.2558436888519135,359.660007,120.182,90.8245095424293 -priorityencoder,tsmc28,8,1000,0.057810999999999946,4.284,14.558,0.04983308199999995 -csa,tsmc28,8,15000,0.06424366666666667,17.136,57.429,7.047530233333334 -mux4,sky90,64,3625,0.27763306896551726,1093.68002,343.331,292.0699885517242 -add,sky90,64,2317,0.48082057660768235,2909.620057,1150.0,1164.5474365438067 -flop,sky90,32,9795,0.11434390454313426,533.119995,259.258,867.1270001028586 -mux8,tsmc28,8,10000,0.097916,30.114,107.344,6.452664400000001 -csa,tsmc28,128,26351,0.04022622393837046,838.655994,7830.0,369.4778668739327 -mux8,tsmc28,1,16320,0.05991150980392156,7.182,38.342,1.8428780415686272 -mux2,sky90,16,5000,0.202074,119.560002,32.354,39.2629782 -mux4,tsmc28,32,14864,0.06722264155005382,81.143999,400.21,26.14960756297094 -mux2,sky90,64,4087,0.2460982481037436,482.160008,124.422,156.51848579398091 -mult,tsmc28,16,4131,0.2579261374969741,1955.772,14600.0,1769.1153770917451 -priorityencoder,sky90,32,3593,0.2755359535207348,251.860005,67.081,22.208197853771225 -mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1420.996059801527 -add,tsmc28,128,10,3.638641000000007,272.789995,918.55,4.8721402990000096 -shiftleft,sky90,32,2581,0.3915127260751647,1663.06003,1070.0,456.8953513297172 -shiftleft,tsmc28,32,1000,0.27288,108.108,356.322,9.632663999999998 -mux2,sky90,16,5279,0.20207381625307821,119.560002,32.354,41.42513233188104 -priorityencoder,tsmc28,8,30692,0.032285780268473875,8.19,32.782,1.6627176838264048 -comparator,tsmc28,8,1000,0.13802199999999998,14.994,49.297,0.47617589999999993 -mult,sky90,8,1500,0.7480066666666666,2434.320046,1720.0,1887.9688266666665 -flop,tsmc28,64,40000,0.048892000000000005,120.959999,630.0,426.02044200000006 -comparator,sky90,128,3031,0.35562111745298586,2876.300054,1050.0,407.1861794836688 -mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1455.3214569227544 -csa,tsmc28,32,10000,0.067577,68.543999,229.117,19.462176 -mult,tsmc28,16,5000,0.257159,2071.818,15400.0,2270.9711290000005 -mux8,sky90,16,3000,0.33252133333333334,472.360009,151.954,94.73532786666667 -priorityencoder,sky90,32,5062,0.20628337534571314,382.200007,257.609,53.83996096523113 -csa,tsmc28,64,23865,0.04077636748376283,395.135998,3790.0,166.5306848036874 -priorityencoder,tsmc28,128,12607,0.08715601213611485,290.304001,892.551,41.747729813199015 -mult,sky90,16,3000,1.0038923333333334,7911.540143,5600.0,21328.696514 -mux8,tsmc28,32,10000,0.099791,141.245999,596.543,33.92894 -mux2d,sky90,1,40000,0.075659,19.6,18.562,18.233819 -mult,sky90,8,1417,0.7728093020465773,2220.68004,1480.0,1687.042706367678 -priorityencoder,sky90,8,8163,0.12799298137939483,56.840001,37.422,12.197731125456325 -comparator,tsmc28,32,11888,0.08411643876177657,145.782,659.808,36.33830154508748 -csa,tsmc28,64,25854,0.040894734431809396,403.199997,3790.0,181.73619981496094 -mux8,tsmc28,64,10089,0.09884385112498761,321.551997,1330.0,75.71438996174051 -mux2,sky90,128,3760,0.26588844680851065,969.220019,329.927,334.75355453191486 -mult,tsmc28,64,20000,0.41826399999999997,18170.838023,126000.0,173285.52040799998 -mux2,sky90,8,5000,0.19934600000000002,59.780001,23.364,21.648975600000004 -comparator,sky90,64,3709,0.32597545133459155,1349.460026,456.898,218.72952784551094 -mux8,tsmc28,16,12256,0.08154268929503918,121.841998,521.624,25.93057519582246 -mux8,sky90,64,2879,0.3471248273706148,2081.520039,830.635,451.2622755817992 -add,sky90,16,2868,0.34958903486750353,629.160012,327.984,289.4597208702929 -comparator,sky90,16,4098,0.2459944738897023,420.420007,284.157,87.82002717862372 -flop,sky90,8,8200,0.11434421951219512,133.279999,64.8145,181.52716568658536 -mux2,sky90,16,4445,0.2164018785151856,114.660002,40.216,41.11635691788527 -priorityencoder,sky90,128,9142,0.2984832548676438,1775.760034,746.946,438.7703846554364 -mult,sky90,16,957,1.044805079414838,6996.220129,4080.0,5647.171454237199 -csa,sky90,128,6350,0.16536131496062992,5822.180113,3500.0,2827.513124511811 -add,sky90,16,2407,0.4150579231408392,465.500009,129.875,167.2683430257582 -shiftleft,tsmc28,16,12538,0.08381253708725475,138.851999,704.964,38.63757959722444 -mult,tsmc28,64,2439,0.41399910004100043,18624.186033,131000.0,21368.563548616235 -mux8,sky90,64,3329,0.34501250765995795,2303.000044,776.23,493.0228734460799 -shiftleft,sky90,32,2796,0.38572179113018595,1639.540029,984.698,465.18048010300424 -csa,sky90,128,6228,0.16536218946692358,5394.900105,3210.0,2530.2068610333977 -flop,tsmc28,8,5000,0.04889200000000002,15.12,78.6345,6.648334160000004 -mux8d,tsmc28,1,30000,0.04558633333333333,20.664,171.151,9.062563066666666 -priorityencoder,tsmc28,8,35000,0.03258242857142857,10.206,40.86,2.2449293285714282 -add,sky90,16,1000,0.994731,318.500006,55.774,95.494176 -mux2d,sky90,1,13746,0.07565843590862796,19.6,18.562,6.272084336825259 -priorityencoder,sky90,8,1000,0.476027,34.300001,2.489,2.036919533 -mux8,sky90,1,5657,0.174947140710624,70.560001,47.08,12.17632099345943 -mux2,sky90,128,4421,0.2594991689662972,1281.840022,841.866,534.5682880705722 -flop,sky90,16,8571,0.11434450029168125,266.5599975,129.629,379.3950519677984 -priorityencoder,sky90,64,4347,0.2409077083045779,890.820015,461.143,96.84489873844032 -csa,sky90,64,5984,0.16522529946524064,2469.600048,1440.0,1354.3517797165773 -mult,sky90,64,1500,1.5638476666666667,98311.641652,63200.0,260978.0263086667 -mux8,sky90,16,3698,0.2855274413196322,599.760012,190.243,128.20182115251487 -add,tsmc28,128,5000,0.199634,491.274002,2320.0,184.461816 -priorityencoder,sky90,8,7675,0.12508715960912054,50.960001,24.761,10.257147087947885 -mux2,tsmc28,8,20000,0.049887,8.064,44.071,2.5492257 -mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24931.79089954522 -mux8,sky90,1,7350,0.1873944217687075,78.400002,53.42,26.141521836734697 -comparator,tsmc28,8,45000,0.05746522222222222,38.682,206.667,28.10049366666667 -comparator,tsmc28,16,13423,0.07431099426357744,49.14,209.169,12.65516232308724 -add,sky90,32,2472,0.4077847443365696,1371.020025,606.918,519.1099795404531 -priorityencoder,tsmc28,64,25000,0.073285,142.632001,441.682,34.517235 -mux8,tsmc28,32,10000,0.099791,141.245999,596.543,33.92894 -comparator,sky90,8,4829,0.2066692116380203,198.940004,136.459,48.56726473493477 -csa,sky90,8,5984,0.16671429946524063,306.740006,227.761,164.04687067379678 -shiftleft,tsmc28,64,8766,0.12003511613050423,1171.043997,6250.0,340.53962446224045 -add,tsmc28,8,13505,0.07299464938911515,32.76,175.834,12.795962037911886 -mux4,tsmc28,8,5000,0.08076000000000001,15.12,51.529,1.6515420000000003 -flop,tsmc28,128,10000,0.048892000000000005,241.919998,1260.0,212.997998 -csa,tsmc28,32,22871,0.04244549263259149,124.992002,962.249,53.52376620969787 -flop,sky90,64,5000,0.11434400000000002,1066.23999,520.0,885.2512480000001 -flop,sky90,32,8746,0.11434398307797851,533.119995,259.258,774.2802814125316 -mux2,sky90,128,4504,0.25949886678507994,1281.840022,841.866,544.4286225150977 -shiftleft,tsmc28,32,15000,0.10560866666666666,343.853999,1770.0,159.15226066666665 -add,tsmc28,32,8848,0.11296289150090416,194.040002,998.32,81.89809633815553 -priorityencoder,tsmc28,64,15000,0.07253866666666667,152.334,494.057,21.7616 -priorityencoder,sky90,16,6001,0.16939489351774706,166.600002,113.104,29.30531657857024 -inv,tsmc28,1,40000,0.014172,0.252,1.005,0.058601219999999996 -priorityencoder,sky90,16,4490,0.22015614922048998,104.860002,29.773,13.385493872605792 -csa,sky90,32,6472,0.16536174289245983,1499.400029,862.282,678.3138693448702 -add,sky90,8,4000,0.282379,275.380005,166.741,155.30845000000002 -mux8,tsmc28,16,1000,0.13324100000000005,55.314,209.842,1.3044293900000006 -mux2,sky90,1,12770,0.08300853563038371,13.72,12.3,4.142125927956147 -mult,sky90,128,514,1.9488052918287937,289359.703832,109000.0,263474.5778446693 -mux2,sky90,128,3000,0.31914533333333334,891.800017,294.708,326.80482133333334 -mux4,sky90,8,3000,0.31358933333333333,116.620002,10.431,22.7665856 -csa,tsmc28,32,10000,0.067577,68.543999,229.117,19.462176 -mult,tsmc28,16,3975,0.2567893270440252,2099.79,15900.0,1793.6734494025156 -mux2,sky90,16,5729,0.20207353237912376,119.560002,32.354,45.04219036730668 -mult,tsmc28,16,10000,0.262212,1697.220001,12500.0,3959.4012 -mux8,tsmc28,64,11220,0.092287559714795,548.855998,2970.0,129.66402139928698 -priorityencoder,tsmc28,128,5000,0.19975,151.956,436.218,21.553025 -comparator,tsmc28,16,1000,0.16839499999999996,29.736,97.953,1.1501378499999997 -priorityencoder,sky90,32,3021,0.3288262197947699,283.220005,52.712,21.603882640516385 -shiftleft,sky90,32,3000,0.3745153333333333,2057.020033,1600.0,674.1275999999999 -shiftleft,sky90,32,2633,0.381174910748196,1713.040029,1170.0,476.84981334599314 -mux2,sky90,16,4908,0.20208898125509372,117.600002,32.354,38.619204317848414 -mux4,sky90,16,4840,0.2558435702479339,359.660007,120.182,91.3361545785124 -add,sky90,16,10,1.9208679999999987,221.479998,28.13,1.513643983999999 -mux2,tsmc28,64,20864,0.052896447852760736,84.167999,422.792,26.077948791411043 -mux4,tsmc28,128,13089,0.07639603056001222,296.603997,1440.0,94.50188980273512 -mux4,tsmc28,64,10,0.17948199999999304,106.847999,391.422,0.049716513999998074 -mux2,sky90,16,4815,0.20207331983385254,119.560002,32.354,37.76750347694705 -csa,sky90,16,1000,0.264181,235.199997,27.622,22.349712599999997 -mux8,tsmc28,128,5000,0.19406900000000002,422.351997,1670.0,97.81077600000002 -flop,tsmc28,32,30000,0.048891333333333335,60.4799995,314.581,159.76954363333334 -mux4d,sky90,1,10284,0.10307742862699339,51.940001,49.18,14.461763236367169 -mux2,tsmc28,8,10000,0.08170100000000001,7.56,38.026,1.9689941000000004 -mux2d,tsmc28,1,70000,0.018931714285714284,3.276,26.574,1.2286682571428569 -flop,sky90,8,9095,0.11434452226498075,133.279999,64.8145,201.35498648251786 -mux8d,tsmc28,1,23253,0.04558620362963919,20.664,171.151,6.992923636786652 -shiftleft,sky90,16,5000,0.303441,717.36001,509.475,327.412839 -mux4,sky90,1,7530,0.12838312483399733,28.420001,21.808,6.1238750545816725 -mux8,tsmc28,128,10891,0.0973569330639978,815.093988,3810.0,190.1380902739877 -comparator,tsmc28,64,9797,0.10206606287639072,213.191999,805.93,51.13509750107175 -flop,sky90,128,9270,0.11434386515641856,2132.4799805,1035.0,3282.1834773824166 -mux8,sky90,1,6048,0.16965991534391534,76.440001,61.295,13.844249092063492 -priorityencoder,sky90,32,2900,0.34199358620689657,279.300005,53.647,20.93000747586207 -priorityencoder,sky90,32,4654,0.21479492995272884,352.800007,178.57,40.57476226807048 -mux8,tsmc28,128,1000,0.438596,411.137997,1430.0,46.7981932 -mux8,sky90,1,4909,0.20284647606437156,73.500001,36.03,15.497470771317985 -mult,tsmc28,8,4779,0.20923679681941829,367.416001,2580.0,255.47812891650972 -mult,sky90,32,500,1.995832,14478.520059,2290.0,6234.979168 -flop,sky90,32,8856,0.1143437958446251,533.119995,259.258,783.998236208672 -mux4d,tsmc28,1,34593,0.029485582458878962,6.426,53.388,1.9961739324661056 -add,tsmc28,128,20000,0.134007,983.934008,4860.0,987.7655969999998 -priorityencoder,sky90,32,4460,0.22416824663677132,317.520006,137.615,34.81332870269058 -add,tsmc28,16,11068,0.09016456017347307,73.584,369.397,26.778874371521507 -shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,566.543120162039 -mux8,sky90,128,2623,0.3811398516965307,3951.360076,1270.0,817.1638420373619 -csa,sky90,16,10,0.2641830000000027,235.199997,27.622,0.2234988180000023 -add,tsmc28,64,9467,0.12393808344776593,417.564004,2000.0,179.09053058202176 -shiftleft,sky90,16,3153,0.3167542619727244,546.840011,299.663,126.70170478908976 -priorityencoder,sky90,64,2874,0.34545211203897,586.040011,117.635,39.96880936290883 -add,tsmc28,128,7210,0.13869425520110956,868.140006,4090.0,331.3405756754508 -flop,sky90,64,7696,0.11434462993762995,1066.23999,520.0,1362.5306103367984 -comparator,sky90,16,4516,0.24525989813994686,430.220006,338.014,114.29111253321524 -mux2,tsmc28,8,28449,0.039864620408450215,11.466,61.225,3.5838293747196746 -mux8,sky90,1,5796,0.1700967812284334,70.560001,49.874,12.400055351552794 -mux8d,tsmc28,1,21936,0.045586162654996355,20.664,171.151,6.614552201239972 -priorityencoder,sky90,128,2944,0.3395509130434783,1468.040026,399.523,92.01829743478262 -mux8,tsmc28,8,14530,0.07174612456985546,114.281999,532.94,25.900350969717824 -mux2,tsmc28,128,17271,0.05864352689479475,177.785999,958.338,51.3717295598402 -mux8,tsmc28,128,10037,0.09948936395337252,591.695996,2690.0,153.5120885800538 -mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88845.84517534176 -mult,tsmc28,32,2973,0.3363555785401951,5141.430011,36900.0,5416.333881232761 -mux4,sky90,16,4625,0.2543802162162162,329.280006,134.438,87.50679437837837 -priorityencoder,sky90,32,3142,0.3180066187141948,284.200006,48.073,20.098018302737113 -mux8,sky90,8,3796,0.27002519494204424,316.540006,115.77,62.969875460484715 -mult,tsmc28,16,20000,0.256602,1987.019999,14900.0,8690.853138 -flop,tsmc28,32,5000,0.04889200000000002,60.4799995,314.5805,26.641250800000012 -csa,tsmc28,16,26351,0.04022622393837046,104.831999,978.516,46.340609977002785 -mux4,sky90,16,4392,0.22421770309653916,359.659999,419.855,99.10422476867029 -priorityencoder,sky90,64,2763,0.3597884433586681,603.680012,124.319,43.42646511339124 -flop,tsmc28,128,19872,0.04889206119162641,241.919998,1260.0,423.234127705314 -mux8,tsmc28,64,10733,0.09302259536010436,298.997999,1480.0,84.09242620553435 -mux2,sky90,32,5148,0.21904019425019425,237.160005,57.793,86.52087672882674 -mux4,sky90,1,8157,0.12749309096481548,28.420001,25.098,6.74438451203874 -mux2,tsmc28,128,16594,0.0601057455706882,184.968,1150.0,58.603101931421 -csa,sky90,8,5740,0.16671402787456446,290.080006,207.654,143.04063591637635 -mux2,sky90,32,4851,0.20550406328592044,369.460007,247.031,134.19415332570605 -comparator,tsmc28,8,10000,0.09890600000000001,19.782,91.167,4.8760658 -mux8,tsmc28,32,1000,0.19088799999999995,105.713999,360.715,4.428601599999999 -priorityencoder,sky90,8,7994,0.12495882036527395,60.760001,44.346,13.42057730723042 -mult,sky90,32,2500,1.284115,29053.080518,21300.0,97288.40474499999 -mux8,sky90,1,6426,0.1691038026766262,74.480001,55.304,14.120167523498285 -shiftleft,tsmc28,16,10000,0.09990500000000001,65.772,298.445,20.390610500000005 -add,sky90,32,2000,0.498008,1061.340021,292.588,363.047832 -shiftleft,tsmc28,8,10000,0.09206,15.876,52.724,4.78712 -mux2,tsmc28,16,20299,0.0524705105177595,15.372,84.373,5.367733225966797 -mux2,tsmc28,64,10,0.18685899999999833,56.826002,290.226,0.03890404379999965 -mux2,tsmc28,64,18160,0.054194079295154185,83.411999,419.592,23.03248370044053 -flop,sky90,64,13561,0.11434387456677236,1066.23999,520.0,2400.821162341236 -comparator,sky90,32,3890,0.2804184087403599,745.780014,371.064,133.75958096915167 -csa,sky90,64,5740,0.16671402787456446,2320.640045,1650.0,1142.1578049686414 -mux4,sky90,64,3350,0.29838746268656713,1108.380021,403.862,310.62134865671635 -mux8,sky90,32,3000,0.3331443333333333,1255.380002,604.182,313.8219619999999 -mult,tsmc28,8,4883,0.2047711359819783,389.844002,2700.0,262.72136746487814 -mux8,tsmc28,8,10000,0.097916,30.114,107.344,6.452664400000001 -comparator,tsmc28,32,1000,0.21222300000000005,64.26,218.101,3.1833450000000005 -mux8,sky90,64,3205,0.33075848049921996,2233.420042,836.819,517.30626350078 -flop,sky90,16,8036,0.11434401991040319,266.5599975,129.629,355.7242459412643 -shiftleft,sky90,16,3288,0.30390325304136256,618.380008,475.055,164.7155631484185 -csa,tsmc28,128,24860,0.04049226146419952,838.655998,7720.0,349.56969322043454 -mux8,sky90,32,3142,0.31754961871419474,1303.399993,667.502,355.0204737224697 -priorityencoder,tsmc28,64,20000,0.073634,136.962001,425.372,27.539116000000003 -comparator,tsmc28,64,7826,0.1274471975466394,167.075999,585.912,40.52820881983133 -shiftleft,sky90,128,2060,0.5385058932038835,10222.380152,7230.0,3480.9020936699035 -mux4,tsmc28,32,15775,0.06679444215530904,75.221999,360.596,24.98112136608558 -mux4,sky90,64,5000,0.291781,1526.840029,832.078,509.74140700000004 -mux8,tsmc28,64,11162,0.09114767926894822,613.241996,3300.0,129.88544295825122 -mux2,sky90,16,4723,0.21094683273343212,117.600002,46.727,43.83475184200719 -csa,sky90,128,6594,0.1653620178950561,5895.680115,3510.0,2778.908710726418 -shiftleft,tsmc28,64,8104,0.12339285389930899,741.131999,3530.0,211.61874443731492 -mux2,sky90,8,5830,0.18835058662092624,64.680001,21.541,21.208276053516297 -mux4,tsmc28,16,20000,0.058395,59.597999,248.099,16.876155 -mux2d,tsmc28,1,20000,0.029229,0.882,4.502,0.15462141 -mux2,sky90,32,5000,0.19989700000000002,374.360008,259.372,136.72954800000002 -mux2,sky90,128,3680,0.27093913043478257,954.520018,237.607,305.89027826086954 -flop,tsmc28,8,5000,0.04889200000000002,15.12,78.6345,6.648334160000004 -mux8,sky90,64,2999,0.33345148149383125,2194.220042,693.108,451.82675742414136 -add,sky90,32,2678,0.4015229947722181,1551.340028,808.275,750.0449542345034 -mux4d,tsmc28,1,36628,0.02948551796439882,6.426,53.388,2.114111638047395 -mult,tsmc28,64,2536,0.41319876656151416,18429.01203,129000.0,22167.700627258677 -mux4,sky90,8,4370,0.2277269519450801,120.540002,34.353,33.52140732631579 -mux2,tsmc28,8,30819,0.03374451614263928,16.758,114.743,5.675827615191928 -csa,tsmc28,64,26848,0.04022672228843861,419.327997,3910.0,188.22083358760426 -csa,tsmc28,64,30000,0.040226333333333336,403.199997,3790.0,204.18886799999999 -mult,tsmc28,16,3585,0.2789380278940028,1195.866006,8100.0,1095.947511595537 -priorityencoder,sky90,8,7824,0.1262658609406953,49.980001,32.378,10.719971593865031 -mux2,sky90,1,10,0.19187900000000013,6.86,1.19,0.003801122990000003 -priorityencoder,sky90,128,3355,0.29794259314456034,1552.320028,520.812,115.30378354694486 -mux4,tsmc28,128,15000,0.07592766666666667,400.175992,1530.0,112.60072966666668 -flop,sky90,64,12431,0.11434405116241654,1066.23999,520.0,2200.779952723031 -mult,sky90,8,5000,0.7641009999999999,2352.000044,1740.0,6556.750680999999 -mux2,sky90,64,3894,0.25622634155110424,475.300009,167.9,170.90296981458656 -flop,sky90,32,7696,0.11434462993762995,533.119995,259.258,681.2653051683992 -shiftleft,sky90,128,1500,0.6660436666666666,5745.740111,1150.0,1234.1789143333333 -add,sky90,8,5000,0.27294,335.160007,239.674,225.72138 -mux4,sky90,64,3847,0.30364581258123213,1512.140028,861.823,480.9749671286717 -csa,tsmc28,16,15000,0.06424366666666667,34.271999,114.858,14.082211733333335 -flop,tsmc28,128,30000,0.048891333333333335,241.919998,1260.0,638.985281 -mux2,sky90,16,3000,0.3169143333333333,117.600002,20.699,35.62117106666666 -mux2,tsmc28,32,18261,0.05585551360823613,32.130001,171.146,10.098676860369093 -flop,tsmc28,64,19458,0.048891743344639735,120.959999,630.0,207.2276541662555 -flop,sky90,32,8692,0.11434432029452371,533.119995,259.258,769.4229312618501 -mux2,sky90,16,4000,0.237892,115.640002,38.479,40.1799588 -flop,sky90,64,9270,0.11434386515641856,1066.23999,520.0,1641.1774965900759 -mux2,tsmc28,64,19319,0.05253551358765982,123.858,794.333,37.56289221517678 -mult,tsmc28,8,5506,0.19457805085361424,617.652002,4640.0,439.7463949291681 -mux8,sky90,32,3330,0.3215693003003003,1068.20002,336.387,237.63971292192196 -mux2,sky90,128,4320,0.2587084814814815,1099.560018,533.373,433.33670648148154 -csa,tsmc28,16,23368,0.04244556384799726,62.496001,481.11,27.41983424580623 -shiftleft,sky90,32,10,1.1897809999999964,882.980017,61.087,1.7715839089999947 -mux2,sky90,16,5076,0.20207351615445232,119.560002,32.354,39.869104737273446 -mux2,tsmc28,64,17773,0.055664121251336296,83.033999,417.316,23.100610319304565 -mux8,tsmc28,1,16000,0.062475,6.3,29.167,1.5312622500000002 -shiftleft,tsmc28,8,20000,0.065862,51.282,375.961,23.183424000000002 -flop,tsmc28,64,5000,0.04889200000000002,120.959999,630.0,53.25805560000002 -priorityencoder,sky90,64,3364,0.29587816052318666,678.160013,160.526,44.64801442294887 -shiftleft,tsmc28,32,9979,0.1024404419280489,306.179999,1540.0,91.88907640945986 -priorityencoder,tsmc28,64,12683,0.07883669896712135,101.556,290.415,12.716359543396672 -mux2d,tsmc28,1,10000,0.029229000000000005,0.882,4.502,0.07716456000000001 -flop,sky90,8,9270,0.11434386515641856,133.279999,64.8145,205.21293479622437 -shiftleft,tsmc28,32,9783,0.10502413349688235,371.825999,1780.0,105.75930243136052 -mux8,sky90,16,3489,0.28785507595299514,649.740012,329.741,145.65466843221554 -mux2,sky90,16,3760,0.2638374468085106,113.680002,32.455,38.573034723404255 -priorityencoder,sky90,8,8314,0.12521204738994468,62.720001,47.365,14.524597497233582 -mux2d,sky90,1,12424,0.07841937540244688,18.62,16.576,5.77166602962009 -mux4,tsmc28,16,10000,0.09861500000000001,28.224,100.845,8.569643500000002 -mux8,sky90,64,2835,0.35519268606701937,2060.940039,675.297,423.7448744779541 -mux2,tsmc28,16,19916,0.0524708857200241,15.372,84.373,5.273324014862422 -flop,sky90,8,14692,0.11434425265450586,133.279999,64.8145,325.25222667574195 -mux8,tsmc28,128,10000,0.099998,598.877995,2750.0,147.397052 -priorityencoder,tsmc28,8,28850,0.034591045060658576,6.174,23.212,1.3386734438474868 -mux4,sky90,64,3403,0.29347936027034965,868.280017,245.808,251.80529111195997 -csa,sky90,32,6228,0.16536218946692358,1349.460026,801.907,633.0064612793835 -flop,sky90,128,7696,0.11434462993762995,2132.4799805,1035.0,2724.718186783784 -mux2,tsmc28,64,18932,0.05313362117050496,84.419999,442.406,24.388332117261776 -mult,sky90,16,5000,1.024199,7447.020138,4910.0,32946.433432 -mux4,tsmc28,1,27830,0.038088446999640674,5.292,41.928,1.813010077182896 -comparator,sky90,64,3006,0.38073099733865606,1235.780024,216.793,164.47579085029943 -mux4,sky90,64,3210,0.3084074797507788,877.100017,251.363,248.5764286791277 -mux2d,tsmc28,1,50870,0.01893195164143896,3.276,26.574,0.8935881174759189 -flop,tsmc28,32,19872,0.04889206119162641,60.4799995,314.5785,105.81953264009664 -add,sky90,16,2993,0.3504499301703976,648.760012,358.879,284.91579322853323 -priorityencoder,sky90,16,5521,0.18079760749864154,119.560002,54.0,16.307944196377466 -csa,sky90,32,6350,0.16398131496062993,1568.000031,983.19,753.9860861889763 -flop,sky90,128,9095,0.11434452226498075,2132.4799805,1035.0,3220.170436026388 -mux2,sky90,8,3000,0.3202723333333333,60.760001,10.548,16.750243033333334 -comparator,tsmc28,64,10862,0.09205807659731172,291.312,1240.0,69.41178975437303 -mux8,sky90,1,5763,0.17009673572791947,70.560001,49.874,12.31500366670137 -mult,tsmc28,32,3277,0.33162315593530667,6157.493997,45600.0,6628.815263990846 -comparator,tsmc28,8,16358,0.0611251677466683,30.492,141.092,8.28857274644822 -mux8,sky90,1,7050,0.1691039716312057,75.460001,55.309,15.845042141843974 -priorityencoder,tsmc28,8,5000,0.057811,4.284,14.558,0.24858730000000004 -mux4,tsmc28,64,15075,0.07022099170812604,145.277999,710.673,50.13778807960199 -flop,tsmc28,16,10000,0.048892000000000005,30.24,157.29,26.641250800000005 -priorityencoder,sky90,128,3548,0.2994219289740699,1660.120031,616.349,131.1468048906426 -comparator,tsmc28,32,5000,0.198683,64.26,222.007,16.1131913 -mux8,tsmc28,64,10560,0.09463496969696969,291.437999,1400.0,81.4807089090909 -add,tsmc28,16,10616,0.09410143782969103,74.340001,376.096,30.018358667671443 -flop,sky90,32,8036,0.11434401991040319,533.119995,259.258,711.3913198725734 -mux4,tsmc28,8,16438,0.060637651417447376,19.404,85.403,5.90004348291763 -mux4,tsmc28,16,15000,0.06665566666666667,31.878,126.281,9.8583731 -comparator,sky90,128,1500,0.6666466666666666,2810.640055,438.161,263.99208 -mux4,sky90,16,10,0.5659639999999939,219.520004,14.317,0.27958621599999706 -mux2,tsmc28,16,17534,0.05470405201323143,14.994,80.805,4.764722930352458 -mux8,tsmc28,32,5000,0.190888,105.713999,390.932,24.128243200000004 -flop,sky90,8,9445,0.11434412493382742,133.279999,64.8145,209.07251523525676 -mult,sky90,16,2500,1.009413,8068.34015,5780.0,17802.007668 -priorityencoder,sky90,16,5753,0.16977035355466713,136.220003,77.243,21.255248265044322 -add,sky90,32,1888,0.5292220169491526,1058.400021,267.445,333.4098706779661 -comparator,sky90,32,10,1.0165939999999978,495.88001,66.41,0.43103585599999905 -shiftleft,tsmc28,64,5000,0.199962,276.947999,930.589,89.9829 -priorityencoder,sky90,128,3365,0.29715682020802375,1659.140031,625.73,124.50870766716194 -shiftleft,sky90,128,1984,0.5263342580645162,9846.060141,7380.0,3251.693046322581 -flop,tsmc28,8,20000,0.048892000000000005,15.12,78.634,26.641250800000005 -mux8,sky90,1,6552,0.16910415262515263,74.480001,55.304,14.39076338840049 -add,sky90,16,2000,0.49998,405.720008,96.508,142.99428 -mux8,tsmc28,64,10000,0.099779,286.649999,1380.0,82.118117 -mux8,sky90,128,1000,0.985713,3202.640062,282.825,555.942132 -flop,tsmc28,64,22356,0.04889172105922347,120.959999,630.0,238.1026815584183 -mux4,tsmc28,16,17467,0.060428815824125494,44.603999,221.905,14.74463106108662 -csa,sky90,64,5617,0.17789797739006588,2320.640045,1650.0,1191.9164485134415 -comparator,sky90,8,3000,0.33027033333333333,131.320002,24.344,23.151950366666668 -add,tsmc28,16,11746,0.08525836523071684,94.878001,497.642,36.23480522305465 -mux4d,sky90,1,50000,0.103077,51.940001,49.18,70.298514 -csa,tsmc28,16,22871,0.04244549263259149,62.496001,481.11,26.82555134379782 -flop,sky90,64,9095,0.11434452226498075,1066.23999,520.0,1610.199562535459 -mult,sky90,32,250,3.969283,13353.480123,1530.0,4985.419448 -comparator,sky90,32,3376,0.29603753080568723,675.220013,193.657,92.95578467298579 -mult,sky90,8,1000,0.999357,1330.840019,338.111,571.632204 -csa,tsmc28,64,10,0.06758100000000411,137.087997,458.434,0.038994237000002374 -comparator,sky90,32,3743,0.2740003753673524,769.300014,392.725,131.24617980096178 -add,tsmc28,128,7961,0.13527036025624922,954.072009,4690.0,386.6026896123603 -mult,sky90,8,1230,0.8126231300813008,1785.560033,804.919,995.4633343495935 -mux8,tsmc28,64,11880,0.08855108417508419,558.053994,2700.0,113.61104099663301 -mux2,tsmc28,16,17915,0.05470414596706671,14.994,80.805,4.874139405665644 -mux4,sky90,16,4452,0.22313914914645103,392.0,398.313,103.09028690566036 -priorityencoder,sky90,128,2884,0.34642463800277395,1455.300028,364.253,93.1882276227462 -priorityencoder,sky90,128,3492,0.31244084306987396,1487.640028,433.096,105.6050049576174 -add,sky90,16,3606,0.3287555851358846,780.080015,633.143,448.42261812534656 -mux4,sky90,64,1000,0.884302,831.040016,55.075,178.9827248 -mux8,tsmc28,16,11756,0.08484694658046955,83.664,357.292,21.466277484858796 -priorityencoder,tsmc28,128,10739,0.09311553990129434,232.218,650.803,28.12089305019089 -comparator,tsmc28,128,8797,0.11367111651699442,437.724,1630.0,98.55285802023417 -mux4,sky90,32,4107,0.2508117299732165,578.200011,343.898,131.6761582359387 -mux4,sky90,1,8000,0.127493,28.420001,25.098,6.616886699999999 -mux4,tsmc28,1,28355,0.038088148651031564,5.292,41.928,1.8434663947099277 -comparator,tsmc28,8,7699,0.12846599831146902,14.994,50.165,3.64843435204572 -mux2,tsmc28,128,15000,0.06636066666666667,145.026001,933.064,48.90781133333333 -mux2,sky90,32,1000,0.98053,222.460004,69.189,99.13158299999999 -comparator,sky90,128,1000,0.971257,1997.240039,243.652,153.1672289 -flop,sky90,128,3000,0.11434433333333333,2132.4799805,1035.0,1062.1445123333335 -flop,tsmc28,8,1000,0.04889200000000005,15.12,78.6345,1.3321114320000014 -comparator,sky90,16,4265,0.24596558851113717,414.540007,269.17,89.03954304103165 -flop,sky90,64,9445,0.11434412493382742,1066.23999,520.0,1672.168483032292 -mux4,sky90,8,4368,0.22772772893772894,120.540002,34.353,33.521521699633695 -csa,sky90,8,5617,0.17789797739006588,290.080006,207.164,149.07850505287524 -csa,tsmc28,32,26351,0.04022622393837046,209.663999,1960.0,92.39963638643694 -csa,tsmc28,16,10000,0.067577,34.271999,114.459,9.737845700000001 -mux2,sky90,8,5390,0.18835075695732836,64.680001,21.541,19.607313799257884 -mux2,tsmc28,1,49852,0.019658375752226592,2.142,15.112,0.5897512725667977 -flop,sky90,32,8396,0.11434433539780849,533.119995,259.258,743.2381800857552 -csa,sky90,8,3000,0.2641803333333333,117.599998,13.811,33.47164823333333 -priorityencoder,sky90,16,6241,0.16662873225444638,185.220003,132.902,34.258867351514176 -shiftleft,sky90,64,2068,0.4833739941972921,3315.340059,1760.0,854.1218477466151 -priorityencoder,tsmc28,32,16453,0.060740189205615996,62.118,205.801,9.439025402552724 -mux4,sky90,16,4181,0.2399792303276728,234.220005,76.845,67.02619903051901 -mux4,sky90,1,50000,0.127493,28.420001,25.098,41.435224999999996 -add,sky90,16,2708,0.36895421861152145,534.10001,198.833,217.3140347621861 -comparator,tsmc28,64,10,0.28591199999999617,134.946,464.047,0.08405812799999887 -mux4,tsmc28,64,5000,0.19659000000000001,107.099999,386.787,35.268246000000005 -flop,tsmc28,8,10000,0.048892000000000005,15.12,78.6345,13.321114320000003 -mux8,tsmc28,32,12426,0.08657342040882023,248.723997,1220.0,58.09076509431838 -add,sky90,16,3180,0.3386494088050314,740.880014,486.695,376.578142591195 -priorityencoder,sky90,64,3929,0.2543516889793841,756.560014,316.035,63.079218866887246 -priorityencoder,tsmc28,8,31306,0.03191275857663067,8.316,34.836,1.713715135565067 -flop,sky90,16,8692,0.11434432029452371,266.5599975,129.629,384.71146563092503 -shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,195.51397310283156 -mux2,tsmc28,128,16933,0.05955828063544558,179.045998,922.967,49.55248948869073 -csa,tsmc28,8,24860,0.04049226146419952,52.416,482.462,21.906313452131936 -flop,tsmc28,8,21942,0.04889169692826543,15.12,78.635,29.22256725402425 -comparator,sky90,128,2915,0.3484451732418525,2890.020055,997.522,387.12258747169807 -mux2,sky90,32,4653,0.2108891085321298,368.480007,245.044,132.01658194111326 -csa,sky90,16,6228,0.16536218946692358,675.220013,403.525,317.16467939755944 -mux8,sky90,128,2680,0.374426328358209,4575.620072,1810.0,983.9923909253732 -mux2,sky90,128,4254,0.25589987259050306,1290.660024,671.3,456.0135729562765 -mux8,sky90,16,1000,0.772607,430.220008,28.957,52.7690581 -shiftleft,tsmc28,8,15183,0.06578013640255549,48.384,333.876,15.51753417736284 -mux4,tsmc28,1,50000,0.038088,5.292,41.928,3.2565239999999998 -mult,sky90,16,937,1.0669988591248665,6421.94011,3210.0,5135.465508967983 -mult,sky90,16,1000,1.00493,7795.900143,5200.0,6776.242990000001 -mux2,sky90,1,11324,0.08482301836806781,11.76,9.281,3.579531375132462 -priorityencoder,tsmc28,8,15000,0.057811666666666664,4.284,14.558,0.7469267333333334 -add,sky90,32,2523,0.41871754736424893,1592.500031,853.025,771.6964397923107 -priorityencoder,sky90,16,4898,0.20367196529195591,111.720002,34.613,12.322153900163332 -priorityencoder,tsmc28,32,15446,0.06469868069403081,48.51,146.502,7.485637356299365 -mux4,sky90,64,3629,0.27551900496004406,1088.78002,330.116,287.09080316836594 -comparator,tsmc28,32,11393,0.08771019406653209,131.922,513.89,32.10193102835075 -csa,sky90,16,6350,0.16398131496062993,784.000015,491.595,377.8129496692914 -comparator,sky90,16,3847,0.25994081258123214,372.400007,183.367,73.5632499604887 -mux8,tsmc28,128,10000,0.099998,598.877995,2750.0,147.397052 -mux2,tsmc28,32,5000,0.171662,28.728001,146.078,8.411438 -comparator,sky90,64,2884,0.34646663800277394,1513.120029,464.935,256.7317787600555 -mux4,tsmc28,128,13890,0.07617124046076314,435.833993,2070.0,119.2079913210943 -comparator,tsmc28,8,18446,0.058025295348585054,37.548,189.059,10.28788486530413 -comparator,sky90,32,3449,0.2893411127863149,699.720014,199.627,88.82772162539868 -mux4,tsmc28,1,10,0.05644999999999811,2.268,8.147,0.0002737824999999909 -mux2,sky90,64,4443,0.24442314877335133,564.480008,226.061,208.98179220121537 -mux8,sky90,32,3267,0.3215692151821243,1068.20002,336.387,233.4592502222222 -mux8,tsmc28,1,16960,0.0601102641509434,9.072,57.908,2.849226520754717 -comparator,sky90,32,3816,0.27776150733752625,694.820013,315.258,109.9935569056604 -mux8,tsmc28,8,12885,0.07750962359332557,44.856,215.13,11.90547818393481 -flop,sky90,16,8221,0.1143447031991242,266.5599975,129.629,363.90201793121275 -mux8,sky90,64,2819,0.35473072188719407,2140.320042,707.164,435.25459575558716 -flop,tsmc28,16,30000,0.048891333333333335,30.24,157.29,79.89821693333333 -add,sky90,8,3254,0.3057880749846343,222.460004,123.164,102.43900511985248 -shiftleft,sky90,32,1500,0.6646396666666666,886.900017,99.1,153.06651523333335 -mux2,sky90,8,5500,0.18835118181818183,64.680001,21.541,20.02173062727273 -mux4,tsmc28,32,10000,0.09749100000000001,56.825999,208.953,16.5929682 -mult,sky90,32,732,1.366111218579235,22404.76038,10600.0,21352.318346393444 -mux4,tsmc28,8,1000,0.07530599999999998,15.12,54.869,0.26808935999999994 -priorityencoder,sky90,8,8003,0.12495914257153568,60.760001,44.346,13.445603740697239 -mult,sky90,128,566,1.9132164522968198,301979.16406,136000.0,333867.7502245088 -mux8,sky90,8,3000,0.3275843333333333,249.900005,69.409,43.07733983333333 -mux2,sky90,64,4176,0.24312560153256704,506.66001,127.14,157.78851539463602 -mult,sky90,64,606,1.6501510165016502,76721.261154,30400.0,74771.64285972278 -comparator,sky90,8,4736,0.21119264864864865,193.060003,97.876,39.936529859459455 -mult,tsmc28,64,2292,0.4363001745200698,14297.346086,92700.0,16408.813263525306 -mux8,sky90,64,3020,0.33032882781456957,2207.940042,730.503,445.61358872185434 -priorityencoder,sky90,8,8154,0.1279921954868776,56.840001,37.422,12.197656229899433 -priorityencoder,sky90,128,3698,0.2876024413196322,1841.420035,765.651,166.23421108274744 -mux2,tsmc28,1,46800,0.021009521367521367,1.89,12.344,0.5670469817094017 -shiftleft,sky90,64,2113,0.473039766682442,3417.26006,2040.0,944.6604140648367 -priorityencoder,sky90,8,5000,0.189665,37.240001,6.744,5.9934140000000005 -flop,tsmc28,8,20286,0.04889208035098097,15.12,78.634,27.015318997934532 -mux2,sky90,64,4308,0.24187027669452182,485.100007,130.641,166.40675036583102 -mux2,tsmc28,128,10,0.31761899999999343,113.274004,578.362,0.1508690249999969 -mux4,sky90,16,4482,0.22884868094600624,436.100003,418.878,106.18578795894689 -mux8,sky90,32,2903,0.34180823665173954,1063.300021,269.795,193.8052701815363 -comparator,sky90,128,2954,0.3591750352064996,2891.000056,940.054,386.4723378821936 -comparator,tsmc28,64,10649,0.093904531035778,282.24,1180.0,66.76612156643817 -mux4d,tsmc28,1,30000,0.032592333333333334,2.898,17.893,0.8636968333333332 -flop,tsmc28,32,15000,0.04889166666666667,60.4799995,314.5805,79.89876166666666 -flop,tsmc28,128,10000,0.048892000000000005,241.919998,1260.0,212.997998 -add,tsmc28,128,1000,0.99962,438.984003,2050.0,155.840758 -priorityencoder,sky90,16,5761,0.16976997552508244,136.220003,77.243,21.28915493084534 -mux4,sky90,64,3477,0.28514725654299683,882.980016,291.66,258.3434144279551 -add,tsmc28,128,6909,0.14471474656245478,799.470005,3620.0,303.75625303459265 -mux4,sky90,8,4560,0.2194212456140351,136.220003,45.945,38.57425497894737 -priorityencoder,sky90,8,7843,0.12626623128904757,49.980001,32.378,10.745256282697948 -mux2,sky90,32,10,0.9805759999999992,222.460004,38.896,0.8491788159999993 -priorityencoder,tsmc28,128,1000,0.998321,114.533999,290.901,15.953169580000003 -comparator,tsmc28,16,5000,0.159549,29.736,99.737,5.759718899999999 -priorityencoder,sky90,16,5292,0.1885644746787604,124.460002,53.077,18.158758911564625 -priorityencoder,sky90,64,4096,0.244021625,794.780014,364.853,72.71844425000002 -mult,sky90,128,545,1.924225385321101,300240.644153,129000.0,317031.526034734 -flop,sky90,128,9445,0.11434412493382742,2132.4799805,1035.0,3344.108277814717 -mux2,sky90,128,4160,0.2573796153846154,1173.060021,662.29,451.70122499999997 -priorityencoder,tsmc28,32,20000,0.059562000000000004,64.89,229.237,11.846881800000002 -mux2d,sky90,1,10000,0.08218500000000001,20.58,16.626,6.648766500000001 -mult,sky90,8,10,2.0858690000000024,1009.399998,188.656,6.028161410000008 -priorityencoder,tsmc28,128,5000,0.19975,151.956,436.218,21.553025 -add,tsmc28,8,14912,0.07161808583690987,40.068,231.443,16.39337984806867 -mux2,tsmc28,64,19705,0.05176354097944684,99.036,577.742,30.644016259832526 -priorityencoder,sky90,32,4776,0.20887023450586265,379.260006,246.78,50.06619521105528 -add,tsmc28,8,20000,0.07108300000000001,39.186001,228.676,21.253817 -comparator,tsmc28,128,9180,0.10891546187363835,501.102,1980.0,112.40075665359478 -flop,tsmc28,64,21114,0.048891939945060144,120.959999,630.0,224.87847777730417 -flop,tsmc28,64,19044,0.048891976895610166,120.959999,630.0,202.82836615143873 -add,sky90,16,2608,0.3831245828220859,501.76001,166.728,192.7116651595092 -csa,sky90,16,6472,0.16536174289245983,752.640015,431.195,339.9837433868974 -add,sky90,128,2120,0.5460561132075472,6351.380123,2520.0,2517.8647380000007 -mux8,tsmc28,16,5000,0.14483200000000002,55.314,186.553,8.2988736 -shiftleft,tsmc28,32,5000,0.19764500000000002,109.116,365.184,35.912096500000004 -priorityencoder,sky90,128,3688,0.2913136746203905,1812.020032,796.412,174.20557742299354 -priorityencoder,tsmc28,16,5000,0.11892000000000001,10.962,32.132,1.1333076 -csa,sky90,32,3000,0.2641803333333333,470.399994,55.245,133.41106833333333 -comparator,sky90,64,3129,0.31954192361776923,1372.980026,508.393,204.82637303899006 -shiftleft,sky90,16,1000,0.885869,350.840007,23.053,49.608664000000005 -comparator,tsmc28,16,14851,0.07291453296074339,54.18,245.908,14.415203166338967 -flop,tsmc28,16,19872,0.04889206119162641,30.24,157.289,52.9109886215781 -add,sky90,32,2729,0.40049359142543056,1463.140027,643.256,639.988759097838 -mux8,tsmc28,128,5000,0.19406900000000002,422.351997,1670.0,97.81077600000002 -flop,sky90,16,8856,0.1143437958446251,266.5599975,129.629,392.0277040532971 -mux2,tsmc28,128,16255,0.06148853245155336,170.351999,890.282,48.5144521042756 -mux2,tsmc28,32,17187,0.05813651079304125,29.862001,160.059,9.691356349199978 -mux8,sky90,32,3393,0.32756543265546717,1058.400021,328.34,238.4676349731801 -mux2,tsmc28,64,5000,0.184446,56.826002,290.343,19.1639394 -mux4,tsmc28,8,17510,0.05699322272986865,20.79,93.425,6.463031457567105 -mult,tsmc28,64,2488,0.41489526045016073,18264.204036,127000.0,21467.925461472663 -flop,tsmc28,128,20000,0.048892000000000005,241.919998,1260.0,425.97155 -priorityencoder,sky90,8,8591,0.1252118846467233,64.680001,49.589,15.751655088557794 -csa,sky90,16,5000,0.190168,376.320007,230.423,173.62338400000002 -comparator,sky90,32,3917,0.2725124214960429,784.000015,394.099,134.34862379754915 -mult,sky90,64,698,1.5625107564469913,90157.061468,50900.0,109641.37977988538 -mux8,tsmc28,64,11591,0.0917438331464067,303.407999,1480.0,88.80803048572169 -mux2,sky90,64,3977,0.25035681342720645,554.679996,252.121,208.29686877143578 -mux2,tsmc28,16,18677,0.05286578936660063,15.246,84.447,5.02224998982706 -comparator,tsmc28,128,9562,0.10932463166701527,537.012,2370.0,129.44036389374608 -mux2,tsmc28,1,54939,0.019658005861045887,2.142,15.112,0.6487141934145143 -priorityencoder,sky90,64,9960,0.24122660642570282,907.480017,482.887,232.78367520080323 -mux8,sky90,32,2966,0.3362054167228591,1244.600016,508.386,286.44701504787594 -priorityencoder,tsmc28,32,10000,0.09431300000000001,31.374,89.513,4.706218700000001 -mux2d,sky90,1,20000,0.075659,19.6,18.562,9.132041300000001 -mux2d,tsmc28,1,48835,0.01965811682195147,2.142,15.112,0.5779486345653732 -priorityencoder,sky90,16,4796,0.2039340892410342,113.680002,45.471,15.458203964470394 -mux8,tsmc28,64,10,0.24052399999999352,206.513999,803.317,0.09717169599999739 -priorityencoder,sky90,32,3202,0.3089048094940662,286.160005,62.489,27.646980449718924 -comparator,sky90,64,1500,0.6527716666666666,1093.680021,113.509,110.38368883333332 -mux2,sky90,32,4877,0.2009620844781628,374.360008,257.036,133.84074826245643 -mux2,sky90,64,4230,0.2413916193853428,486.080007,135.616,165.11186765957447 -csa,tsmc28,32,23368,0.04244556384799726,124.992002,962.249,54.66988623622048 -comparator,sky90,64,3000,0.3577763333333333,1300.460025,318.158,175.66817966666665 -mux4,tsmc28,8,17855,0.055966720806496784,21.294,100.234,6.576089694763372 -mux4,sky90,16,4661,0.2587782347135808,443.940009,202.299,124.47233089723237 -mux2,tsmc28,16,18767,0.053046021580433735,15.75,88.025,5.1189410825118555 -mult,sky90,16,977,1.0235154534288637,7283.360133,4580.0,6243.444265916068 -priorityencoder,sky90,8,7522,0.1322263661260303,47.040001,18.57,10.221098101542143 -mux8,sky90,8,3650,0.275701602739726,461.580008,275.08,100.90678660273971 -mux4,tsmc28,1,75000,0.038088333333333335,5.292,41.928,4.886733166666667 -add,sky90,32,3000,0.3884043333333333,1541.540029,817.761,791.1796269999999 -mux2,sky90,1,11565,0.0848227907479464,11.76,9.281,3.6558622812364896 -mult,sky90,128,503,1.9880695705765408,271535.463355,88700.0,235369.54453012726 -shiftleft,sky90,64,2500,0.461033,3849.440065,2670.0,1177.478282 -priorityencoder,sky90,64,3000,0.3329453333333333,588.000011,102.574,36.12456866666666 -comparator,sky90,128,2740,0.37589550364963503,2779.280054,939.943,371.00886210218977 -comparator,sky90,8,4643,0.21420998836958863,196.000004,116.123,46.91198745293991 -flop,sky90,64,8221,0.1143447031991242,1066.23999,520.0,1455.4365546700524 -mux2,tsmc28,64,10000,0.09951600000000001,58.842002,305.597,19.186684800000002 -csa,tsmc28,64,26351,0.04022622393837046,419.327997,3910.0,184.75904654893554 -csa,tsmc28,8,20000,0.049745000000000004,29.106,205.916,11.0682625 -add,sky90,16,2500,0.398809,514.50001,175.755,195.017601 -mux8d,sky90,1,7552,0.13242725423728816,85.260001,40.087,14.606726142372885 -mux8,tsmc28,8,13707,0.07518442423579193,51.281999,237.31,13.503122592748229 -comparator,tsmc28,64,15000,0.09390366666666666,274.554001,1420.0,96.25125833333333 -priorityencoder,sky90,128,3064,0.3263637571801567,1580.740029,511.083,110.31094992689296 -mux8,sky90,64,3239,0.32447026458783573,2216.760041,812.345,524.0194773093547 -mult,tsmc28,8,5610,0.19440611942959002,567.0,4250.0,420.50043632620327 -priorityencoder,sky90,8,7670,0.1250870964797914,50.960001,24.761,10.257141911342897 -flop,sky90,8,8692,0.11434432029452371,133.279999,64.8145,192.4014705435803 -priorityencoder,sky90,64,4263,0.24266358925639223,888.860015,460.967,85.90291059676285 -mux8d,tsmc28,1,20181,0.04936555839651157,11.718,79.832,3.7122899914176704 -priorityencoder,sky90,128,1000,0.998926,1112.300022,122.086,52.943078 -comparator,sky90,8,5014,0.2153795636218588,244.020004,169.251,67.84456254088553 -comparator,tsmc28,128,5000,0.19976100000000002,321.677997,1000.0,77.50726800000001 -priorityencoder,tsmc28,8,30000,0.033051333333333335,8.946,35.541,1.8178233333333336 -mux2,sky90,64,4320,0.2418704814814815,485.100007,130.641,166.89063222222225 -priorityencoder,sky90,128,10,2.830100999999999,891.800017,56.107,1.1999628239999998 -flop,sky90,128,9795,0.11434390454313426,2132.4799805,1035.0,3467.9934528409904 -mux2,sky90,32,4674,0.21088850791613178,368.480007,245.044,132.6488714792469 -mult,sky90,8,2500,0.750058,2382.380044,1770.0,3108.9904099999994 -shiftleft,tsmc28,32,9392,0.10642459454855197,294.083999,1370.0,79.92487050596253 -mux8d,sky90,1,30000,0.13242133333333334,89.180001,52.758,62.63529066666666 -mux4,sky90,16,3000,0.3279513333333333,219.520004,18.013,48.27443626666666 -add,sky90,16,2658,0.3757077238525207,513.52001,174.271,202.50646315650866 -mux2,tsmc28,128,20000,0.059058,195.678,1280.0,73.172862 -csa,sky90,128,3000,0.2641803333333333,1881.599976,220.98,534.4368143333334 -shiftleft,tsmc28,8,5000,0.11473000000000001,15.624,51.647,2.9829800000000004 -csa,tsmc28,8,25000,0.040492,52.416,482.462,21.987156 -priorityencoder,sky90,32,3383,0.2949926251847472,275.380005,64.645,22.124446888856045 -flop,tsmc28,16,10000,0.048892000000000005,30.24,157.29,26.641250800000005 -mux4,tsmc28,8,17390,0.05743431282346176,19.656,88.061,6.099524021851638 -csa,tsmc28,128,25854,0.040894734431809396,806.399994,7580.0,363.5132943643537 -flop,sky90,8,8221,0.1143447031991242,133.279999,64.8145,181.99102961172608 -mux2,tsmc28,16,20202,0.0524700495000495,15.372,84.373,5.346698044055044 -csa,tsmc28,128,23865,0.04077636748376283,790.271996,7570.0,333.1021459748586 -shiftleft,sky90,64,2428,0.46200861449752884,3862.180062,2660.0,1248.347276372323 -mult,sky90,128,482,2.0746737966804982,251002.502634,63000.0,174440.64749869297 -mux4,tsmc28,16,16782,0.0609306534382076,40.95,193.54,12.91729852890001 -shiftleft,sky90,16,10,0.8858720000000062,350.840007,23.053,0.4881154720000035 -comparator,tsmc28,16,13994,0.07279919679862798,54.432,268.994,14.945675102758324 -comparator,sky90,64,2945,0.3395135738539898,1335.740026,243.845,196.91787283531409 -mux4,tsmc28,128,12822,0.07797695304944627,296.477998,1450.0,95.75569834472002 -mux4,tsmc28,32,15168,0.0658052700421941,69.174,324.969,23.229260324894515 -priorityencoder,tsmc28,16,10,0.11891900000000533,10.962,32.132,0.0022713529000001017 -priorityencoder,tsmc28,32,5000,0.187693,26.334,75.077,3.6750289400000002 -mult,tsmc28,32,3155,0.32798221077654516,6954.947997,52200.0,7078.184090768621 -mux8,sky90,64,3267,0.3220962151821243,2250.080042,750.807,496.35026759565346 -add,sky90,64,2184,0.46074645787545787,2923.340057,1070.0,1044.512220003663 -add,sky90,32,2575,0.4100975145631068,1373.960026,614.613,581.1081781359223 -mux4,tsmc28,16,17810,0.06090523133071308,41.454,205.138,13.64277181807973 -csa,sky90,128,5984,0.16522529946524064,4897.060095,2990.0,2649.0572263262034 -flop,sky90,16,9445,0.11434412493382742,266.5599975,129.629,418.09929282054003 -mux2,tsmc28,64,20000,0.051763,99.036,577.742,31.0578 -mux4,sky90,32,5000,0.262987,543.900011,212.404,194.084406 -mux8,tsmc28,8,15000,0.07295366666666667,88.452,395.732,20.281119333333336 -comparator,tsmc28,128,8989,0.1112220797641562,476.28,1890.0,107.66297321170319 -add,tsmc28,8,14068,0.07184030963889679,34.020001,190.571,13.549082397895933 -comparator,tsmc28,64,10436,0.09578215408202377,251.748,991.197,57.27772814105021 -mux2,sky90,64,4354,0.2418708631143776,485.100007,130.641,168.10024986449247 -csa,tsmc28,8,10000,0.067577,17.136,57.229,4.8723016999999995 -shiftleft,sky90,64,2383,0.452526110365086,3978.800061,2870.0,1276.1236312295428 -mux2,tsmc28,1,52905,0.01965880512238919,2.142,15.112,0.6251500028919763 -flop,sky90,8,15822,0.11434413487548983,133.279999,64.8145,350.2360851236253 -add,tsmc28,8,5000,0.16307100000000002,19.908,102.357,6.0988554000000015 -csa,sky90,32,6106,0.16536133770062234,1318.100026,832.166,624.0736884821488 -mux4,sky90,16,4302,0.23042302324500233,358.679999,417.161,98.85147697210601 -priorityencoder,tsmc28,8,28236,0.03511478127213486,6.048,21.019,1.365964991486046 -mux8,tsmc28,128,9823,0.10175189351521939,619.289998,2820.0,146.11571908785504 -mult,sky90,8,1444,0.7414307756232688,2439.220046,1840.0,1851.352646731302 -comparator,tsmc28,8,15000,0.06629966666666666,23.814,104.468,6.2454286 -priorityencoder,sky90,32,5236,0.20672748510313216,391.020008,222.23,54.369328582123764 -mux4,sky90,16,4185,0.23997962604540024,234.220005,76.845,67.26628918052569 -shiftleft,tsmc28,16,15000,0.08606166666666666,126.251999,625.471,45.44056 -mult,sky90,32,779,1.3274520474967908,26237.540459,16100.0,26468.06637503851 -mux2,tsmc28,8,27263,0.03986474911051609,11.466,61.225,3.4363413733264867 -mux8,tsmc28,1,16640,0.060018153846153846,9.072,51.135,2.616791507692308 -csa,tsmc28,128,24362,0.0409855330432641,838.655998,7690.0,347.4343636077498 -mux2d,tsmc28,1,53922,0.01893230618300508,3.276,26.574,0.9466153091502538 -flop,tsmc28,32,22356,0.04889172105922347,60.4799995,314.581,119.06600829552691 -mux2,sky90,32,5049,0.21904002158843336,237.160005,57.793,84.98752837631214 -mux2,sky90,32,4950,0.199897202020202,374.360008,259.372,135.53030296969698 -add,sky90,8,3428,0.2914162858809802,231.280005,126.487,106.94977691831973 -mux2,sky90,16,5616,0.20207367806267806,119.560002,32.354,44.132891288888885 -priorityencoder,sky90,128,3124,0.3200004327784891,1493.520029,372.988,91.2001233418694 -mux4,tsmc28,128,10000,0.098899,224.153997,821.731,67.152421 -mux8,sky90,32,2890,0.3453957612456747,985.880019,218.073,180.2965873702422 -mux2,tsmc28,128,10000,0.099897,123.480003,771.154,44.454165 -priorityencoder,tsmc28,16,25000,0.046222,26.334,100.815,5.3894852 -comparator,sky90,16,4432,0.24525976895306859,431.200006,339.523,112.57423394945846 -mux4,sky90,32,3879,0.2611894016499097,714.420014,329.878,193.2801572209332 -add,tsmc28,32,9425,0.10609979575596817,184.212001,900.476,73.84545784615385 -comparator,tsmc28,8,17402,0.05747365923457074,42.336,233.903,11.747615947546262 -mux8d,sky90,1,8156,0.13242712211868563,91.140001,43.658,17.294982148700342 -csa,tsmc28,16,10000,0.067577,34.271999,114.459,9.737845700000001 -flop,tsmc28,16,20000,0.048892000000000005,30.24,157.289,53.2580556 -mux8,tsmc28,8,12611,0.07927785282689714,44.352,200.257,11.60627765385774 -flop,tsmc28,128,19044,0.048891976895610166,241.919998,1260.0,405.6078403259819 -mux8,sky90,64,3000,0.3334513333333333,2194.220042,693.108,451.82655666666665 -mux2,tsmc28,1,51887,0.019658650182126546,2.142,15.112,0.6133498856823483 -comparator,tsmc28,128,10327,0.10410854313934348,541.926002,2580.0,130.65622163987607 -flop,tsmc28,128,21114,0.048891939945060144,241.919998,1260.0,449.7080636146632 -priorityencoder,sky90,16,5868,0.17150381458759373,132.300003,66.355,18.488111212542606 -mux4d,tsmc28,1,35271,0.029485903830342206,6.426,53.388,2.0345273642936124 -mux8,tsmc28,64,10120,0.09832222924901185,288.287999,1400.0,81.90241696442686 -mux8d,sky90,1,7703,0.13242755082435415,89.180001,45.284,16.315074261560433 -mult,tsmc28,128,2020,0.522909504950495,49710.654218,308000.0,60818.03579227723 -mux4,tsmc28,1,24155,0.0400992962119644,2.898,17.89,0.8541150093148416 -mux8,sky90,128,2893,0.3507679426201175,4578.560081,1790.0,992.3225096723124 -mult,sky90,8,500,1.752451,1003.519999,119.994,216.4276985 -mux4,sky90,64,3699,0.2695173360367667,877.100017,304.149,274.9076827575021 -mux8,sky90,128,2780,0.3646542302158273,4614.820086,1880.0,984.2017673525179 -flop,sky90,32,13561,0.11434387456677236,533.119995,259.258,1200.5535110138267 -csa,sky90,8,10,0.2641830000000027,117.599998,13.811,0.11174940900000115 -mux4,sky90,16,4630,0.2543807213822894,327.320006,132.911,86.74382599136067 -mult,tsmc28,8,5000,0.19998100000000002,444.150001,3260.0,306.970835 -mux4d,sky90,1,9701,0.10307715647871353,51.940001,49.18,13.626800086485927 -add,tsmc28,128,7510,0.13762179227696406,939.330009,4610.0,369.37689047137155 -add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2147.9741690795754 -csa,sky90,32,10,0.2641830000000027,470.399994,55.245,0.4448841720000046 -mux8,tsmc28,1,15360,0.06469516666666666,6.804,32.934,1.8444592016666665 -mux8,tsmc28,8,5000,0.120592,30.114,99.721,3.5815824000000003 -mult,tsmc28,8,10000,0.195858,517.356,3650.0,683.3485619999999 -shiftleft,tsmc28,64,7773,0.1286454567091213,641.717996,2950.0,188.72288499228097 -comparator,tsmc28,32,13374,0.08571594556602362,133.686,630.714,37.7150160490504 -mult,tsmc28,128,2058,0.5220186491739554,51424.506238,321000.0,64453.12059485909 -mux4,tsmc28,8,20000,0.057128000000000005,28.476,137.138,8.4035288 -priorityencoder,sky90,8,7035,0.14136441080312723,47.040001,23.417,10.135828254584222 -flop,sky90,64,9795,0.11434390454313426,1066.23999,520.0,1734.1396563011742 -priorityencoder,tsmc28,16,19904,0.050228157556270095,17.514,56.726,3.289944319935691 -shiftleft,tsmc28,64,1000,0.409802,270.395999,883.096,37.783744399999996 -mux2,sky90,128,4170,0.25730815347721825,1269.100023,748.523,473.4470023980815 -priorityencoder,tsmc28,16,5000,0.11892000000000001,10.962,32.132,1.1333076 -priorityencoder,tsmc28,8,10,0.05780799999999431,4.284,14.558,0.0004983049599999509 -mux2d,sky90,1,13217,0.07565913467503972,19.6,18.562,6.03759894706817 -mult,sky90,64,10,15.014533999999998,46801.860227,5460.0,2921.8283163999995 -priorityencoder,sky90,64,4180,0.25021144976076554,817.320015,379.7,76.56470362679427 -mux2,sky90,128,3920,0.25878504081632653,955.500018,279.611,313.9062545102041 -mux8,tsmc28,16,5000,0.14483200000000002,55.314,186.553,8.2988736 -mux8,sky90,64,3082,0.34575863335496426,2242.240042,748.799,474.7266035963659 -mux4,sky90,8,5130,0.2207507738791423,203.840004,102.281,52.91396049883042 -flop,sky90,16,3000,0.11434433333333333,266.5599975,129.629,132.80522594999996 -shiftleft,sky90,8,3000,0.33269333333333334,141.120003,30.136,37.294922666666665 -flop,sky90,128,8221,0.1143447031991242,2132.4799805,1035.0,2910.644419933706 -flop,sky90,8,8856,0.1143437958446251,133.279999,64.8145,196.03100359602527 -comparator,tsmc28,64,11288,0.0938496527285613,291.06,1290.0,72.45193190644933 -mux4,tsmc28,8,16796,0.05895498523457966,18.27,77.583,5.683260576613479 -comparator,tsmc28,128,10136,0.10660024782951855,543.312001,2520.0,135.06251400000002 -mux2,sky90,64,3430,0.28852518950437317,448.840009,147.731,164.4593580174927 -comparator,sky90,32,4309,0.27354740659085636,783.020014,417.236,160.57232766883266 -mux8,sky90,16,3559,0.2891288027535825,618.380012,225.209,131.26447645012647 -shiftleft,tsmc28,32,10174,0.10280675820719481,358.596,1780.0,107.74148260114016 -mux4,sky90,128,3000,0.33169133333333334,1739.500034,538.522,538.9984166666667 -csa,tsmc28,64,23368,0.04244556384799726,249.984005,1920.0,109.29732690859295 -flop,sky90,8,14974,0.11434442286630159,133.279999,64.8145,331.4844818894083 -mult,tsmc28,64,2585,0.4134561953578336,18989.46002,133000.0,23198.613665332687 -mux4,tsmc28,1,26780,0.0380882994772218,5.292,41.928,1.7444441160567585 -mux8,tsmc28,1,14720,0.06778378260869565,6.426,24.602,1.8443967247826085 -add,sky90,32,2369,0.42146203756859435,1269.100025,518.794,483.83841912874635 -mux8,sky90,8,3723,0.2746425909213,422.380008,228.3,98.87133273166799 -priorityencoder,sky90,16,5510,0.18039020326678767,120.540002,52.321,16.505703598911072 -mux4,sky90,128,3425,0.304276802919708,2258.900043,886.735,674.2773952700729 -flop,tsmc28,16,19458,0.048891743344639735,30.24,157.289,51.81791418381642 -mux2,sky90,8,5280,0.1887229393939394,63.700001,23.506,19.476207345454547 -mux2,sky90,32,4554,0.21088917610891522,368.480007,245.044,129.27506495476504 -mult,sky90,128,250,3.999108,188831.301177,20100.0,78018.597972 -flop,tsmc28,8,21528,0.04889213340765514,15.12,78.635,28.675236243589737 -mux8,sky90,32,3029,0.3299529610432486,1271.060004,619.714,330.2829140042919 -mux4,tsmc28,8,10000,0.08076,15.12,51.529,3.303084 -csa,tsmc28,128,25357,0.04036684189770083,806.399994,7610.0,349.25391609890767 -shiftleft,tsmc28,16,13030,0.08248097083653108,126.251999,604.042,36.45658910974674 -add,tsmc28,128,15000,0.13714166666666666,974.106009,4840.0,761.8219583333332 -csa,sky90,32,5862,0.16964724223814398,1306.340025,907.673,644.320226020471 -mux8,sky90,16,3419,0.2915101822170225,588.000006,280.193,150.71076420620065 -flop,tsmc28,128,15000,0.04889166666666667,241.919998,1260.0,319.48259583333333 -mux2,tsmc28,64,15000,0.06486766666666667,66.780001,502.862,24.909184 -priorityencoder,sky90,32,5042,0.20717899444664814,403.760008,230.606,55.10961252280841 -csa,tsmc28,16,26848,0.04022672228843861,104.831999,978.516,47.18594524433849 -mux2,sky90,64,3570,0.2747030448179272,451.780009,153.75,165.92063907002802 -csa,tsmc28,16,30000,0.040226333333333336,100.799999,946.62,51.208122333333336 -mult,tsmc28,8,20000,0.19250299999999998,618.156001,4560.0,1558.5042879999999 -shiftleft,tsmc28,32,10000,0.102217,374.85,1790.0,106.71454800000001 -add,sky90,8,3013,0.3308971211417192,198.940004,66.324,77.4299263471623 -shiftleft,sky90,64,3000,0.47321633333333335,3605.42006,2240.0,1321.2200026666667 -priorityencoder,sky90,32,2839,0.3493967030644593,246.960005,41.044,19.042120317013033 -csa,sky90,128,5740,0.16671402787456446,4641.28009,3300.0,2283.315325770035 -comparator,tsmc28,8,17054,0.05854826984871585,32.256,160.477,8.752966342383019 -priorityencoder,sky90,32,4585,0.21805850817884406,366.520007,180.82,41.97626282442749 -flop,sky90,32,8221,0.1143447031991242,533.119995,259.258,727.746863510826 -mux4,sky90,64,3280,0.3043220487804878,1098.580021,291.464,250.45704614634147 -priorityencoder,sky90,32,3797,0.2610688151171978,258.720005,71.643,21.981994232868058 -shiftleft,tsmc28,128,7164,0.14037182300390844,2240.027998,10800.0,643.0433211809045 -mux2,tsmc28,128,5000,0.18098,114.408004,584.652,35.2911 -add,sky90,32,10,3.640808000000007,456.679995,55.753,5.836215224000011 -mux2,sky90,16,4512,0.21640220567375884,114.660002,40.216,41.57086370992907 -priorityencoder,tsmc28,32,18132,0.05875811405250386,67.41,246.647,10.552957283829693 -shiftleft,sky90,64,2000,0.499505,2896.880055,1130.0,677.3287799999998 -flop,tsmc28,128,22356,0.04889172105922347,241.919998,1260.0,476.180917256307 -mult,sky90,128,596,1.9076103489932885,308170.804333,146000.0,374664.21059402684 -priorityencoder,tsmc28,32,10,0.24741400000000624,25.326,69.772,0.009748111600000246 -mux2d,tsmc28,1,50000,0.019658000000000002,2.142,15.112,0.5917058000000001 -priorityencoder,sky90,64,10,1.535827999999995,436.100008,29.529,0.34847937319999883 -flop,tsmc28,64,10,0.048889000000002625,120.959999,630.0,0.10640935295000573 -csa,sky90,64,6594,0.1653620178950561,2947.840057,1740.0,1386.7258820679403 -mux2,sky90,1,12288,0.08300820833333333,13.72,12.3,3.9843940000000004 -priorityencoder,sky90,16,1000,0.660731,85.260002,6.292,4.763870509999999 -priorityencoder,sky90,64,2883,0.345291908775581,590.940012,95.886,34.77089521370101 -mult,tsmc28,8,10,0.5050959999999947,142.631998,499.628,0.32073595999999666 -mult,tsmc28,128,1944,0.5260002921810699,48099.11423,294000.0,56298.86327272428 -priorityencoder,sky90,16,6481,0.1666291763616726,185.220003,132.902,35.87526167066811 -comparator,sky90,128,3090,0.3677285954692556,2822.400054,890.508,395.6759687249191 -comparator,sky90,64,3190,0.33052562382445144,1324.960026,402.199,187.07750308463952 -flop,tsmc28,32,21114,0.048891939945060144,60.4799995,314.5805,112.44412808264657 -shiftleft,sky90,8,4321,0.23108991020597083,250.880004,181.951,70.25133270261513 -flop,sky90,32,5000,0.11434400000000002,533.119995,259.258,442.6256240000001 -mux2,sky90,16,4794,0.20207407592824364,119.560002,32.354,37.72722997580309 -mult,sky90,16,250,3.988552,3676.960008,419.503,1376.0504399999998 -shiftleft,tsmc28,64,10,0.40777599999999836,270.395999,887.005,0.3213274879999988 -mux2,tsmc28,8,5000,0.08170100000000001,7.56,38.026,0.9836800400000001 -csa,sky90,8,6594,0.1653620178950561,368.480007,219.63,174.4569288792842 -mux8,tsmc28,8,13433,0.07609853457902181,46.368,224.828,12.396451282922651 -flop,tsmc28,32,19044,0.048891976895610166,60.4799995,314.5785,101.42885066878809 -add,sky90,8,1000,0.935737,130.340001,20.228,39.6752488 -comparator,sky90,32,3963,0.2725030903356043,795.760015,403.768,138.15906680015138 -mux8,tsmc28,128,11105,0.095745527239982,640.709998,3230.0,181.9165017559658 -mux8,tsmc28,64,9874,0.10113207859023698,338.687995,1560.0,80.60226663641886 -priorityencoder,tsmc28,128,25000,0.088162,299.628,938.146,85.60530200000001 -comparator,sky90,16,1000,0.80105,253.820005,31.496,21.78856 -comparator,sky90,32,3670,0.27335356403269756,702.660013,336.839,110.70819343324253 -csa,tsmc28,32,15000,0.06424366666666667,68.543999,229.716,28.07448233333334 -mult,sky90,8,1337,0.7566541563201197,2445.100044,1850.0,1641.1828650583395 -priorityencoder,sky90,8,8643,0.12521256693277796,64.680001,49.589,15.839389716996413 -priorityencoder,sky90,64,3064,0.3243807571801567,637.980011,153.244,43.110202629242835 -priorityencoder,sky90,8,10,0.47602799999999945,34.300001,2.489,0.020369238119999977 -mux4,tsmc28,64,15365,0.07022098080052067,145.151999,709.17,51.05065304197852 -mult,sky90,8,250,2.0311909999999997,1008.419998,129.045,110.69990949999998 -priorityencoder,tsmc28,16,1000,0.11892000000000003,10.962,32.132,0.22713720000000004 -inv,tsmc28,1,100000,0.014039,0.504,4.002,0.5508903599999999 -mux2,sky90,128,4004,0.24974824975024976,1302.420025,767.078,466.52973053346653 -csa,tsmc28,8,25357,0.04036684189770083,50.4,475.36,21.878828308553853 -mux8,tsmc28,64,10000,0.099779,286.649999,1380.0,82.118117 -flop,tsmc28,64,30000,0.048891333333333335,120.959999,630.0,319.50486333333333 -comparator,sky90,32,3000,0.33282433333333333,709.520014,191.954,96.85188100000002 -mux2,sky90,32,5080,0.21904039370078743,237.160005,57.793,85.64479393700789 -priorityencoder,sky90,32,4393,0.2258208736626451,305.760006,127.875,32.60853415688596 -mux2,tsmc28,32,10000,0.092725,28.854001,147.552,8.595607500000002 -mult,tsmc28,64,5000,0.41003500000000004,19819.80001,141000.0,46252.768070000006 -flop,tsmc28,128,19458,0.048891743344639735,241.919998,1260.0,414.43086246083874 -priorityencoder,sky90,16,5881,0.16984310899506885,137.200003,75.195,21.196420002584592 -mult,sky90,16,1036,1.0221289652509653,7598.920137,5000.0,7157.96914365251 -mult,sky90,64,1000,1.5798130000000001,90854.821458,52800.0,161753.89344400002 -mux2d,tsmc28,1,47818,0.020583627044209293,2.016,13.728,0.5783999199422811 -add,tsmc28,64,9040,0.11949846902654868,475.776003,2420.0,197.53096930088495 -add,sky90,64,1500,0.6623996666666666,1876.700037,351.887,555.7533203333332 -priorityencoder,sky90,32,3000,0.33104633333333333,283.220006,50.998,20.591081933333335 -mux2,sky90,8,10,0.41516099999999767,58.800001,10.206,0.07414775459999959 -add,sky90,8,3193,0.31265709238960226,221.480004,84.937,81.91615820607579 -add,tsmc28,16,10,0.4766620000000046,32.886,116.238,0.07459760300000072 -mux4,sky90,8,5128,0.22075080031201247,203.840004,102.281,52.89189175475819 -mux8,sky90,64,3144,0.33161415776081427,2234.400043,750.071,464.25982086513994 -mult,tsmc28,16,10,1.0536270000000059,539.405992,1830.0,3.8773473600000217 -mux4,sky90,64,3768,0.2730587813163482,889.840017,328.011,290.2614845392781 -add,tsmc28,32,10387,0.1022801888899586,220.878002,1160.0,90.4156869787234 -flop,sky90,16,9095,0.11434452226498075,266.5599975,129.629,402.6070628949972 -priorityencoder,tsmc28,8,45000,0.03335422222222222,7.686,30.147,2.3214538666666664 -priorityencoder,sky90,16,10000,0.174027,188.160003,138.994,58.99515299999999 -mux2,tsmc28,1,10,0.035370000000000346,0.882,4.502,9.337680000000092e-05 -priorityencoder,tsmc28,8,30078,0.033050891415652636,8.946,35.541,1.8244092061440254 -mux4,tsmc28,8,19980,0.057128050050050054,28.476,137.138,8.392110552352353 -add,tsmc28,64,8035,0.12444750715619167,392.616003,1890.0,149.95924612321096 -add,sky90,16,2558,0.3907014143862393,488.040009,139.585,180.89475486082878 -mult,sky90,128,493,2.028396565922921,264363.823149,77300.0,208790.97211670992 -mux4d,sky90,1,10000,0.103077,51.940001,49.18,14.059702799999998 -mult,tsmc28,8,5091,0.19642506383814576,516.222001,3840.0,342.9581614614025 -mux2,tsmc28,16,19059,0.05221864998163597,15.75,88.448,5.133093293194816 -shiftleft,sky90,128,1831,0.5460426450027308,8026.200142,4230.0,2303.7539192665217 -mult,sky90,8,1283,0.7790382268121591,2050.160038,1240.0,1337.608635436477 -shiftleft,tsmc28,8,14880,0.06714730107526881,43.218,291.223,14.020356464516128 -priorityencoder,tsmc28,32,17461,0.05890548851726705,66.15,236.198,10.184758964635472 -comparator,sky90,32,4504,0.27431686678507994,750.680015,350.972,141.82182012788633 -shiftleft,sky90,32,2848,0.38303659550561797,1805.160032,1200.0,530.1226481797753 -shiftleft,sky90,8,4585,0.22936450817884405,246.960005,191.922,69.03871696183205 -add,sky90,128,10,13.960425999999998,1867.879976,221.488,91.16158177999999 -mux2,tsmc28,16,20682,0.05247022328594913,15.372,84.373,5.472644288724495 -add,sky90,128,1845,0.5417794200542005,5779.060112,1800.0,1762.9502328563688 -mux4,tsmc28,128,12555,0.0795745420151334,286.901998,1350.0,90.95370152329748 -mult,sky90,128,10,29.683678,180759.040854,18000.0,23182.952518 -comparator,sky90,8,10,0.7786030000000039,118.580002,16.053,0.10402136080000053 -mux2,tsmc28,32,20000,0.055856,32.130001,171.146,11.0483168 -mux8,tsmc28,64,11660,0.09174329331046312,303.407999,1480.0,89.35796768439107 -mux8,tsmc28,1,17280,0.06011037037037037,9.072,57.908,2.909341925925926 -mux2,tsmc28,8,27000,0.04027303703703704,10.332,86.166,4.244778103703704 -mux2,sky90,16,10,0.6069949999999977,113.680002,19.729,0.23430006999999914 -priorityencoder,sky90,32,5158,0.20654259441644046,395.920008,214.865,53.0814467650252 -add,sky90,128,1500,0.6662036666666666,5078.360098,1260.0,1444.3295493333335 -priorityencoder,sky90,8,8483,0.12521182447247436,64.680001,49.589,15.538787417034069 -priorityencoder,tsmc28,128,11673,0.08624078034781119,291.312,929.579,39.15331427790628 -inv,sky90,1,50000,0.068855,3.92,5.959,15.354665 -mux4,tsmc28,64,5000,0.19659000000000001,107.099999,386.787,35.268246000000005 -flop,tsmc28,32,10000,0.048892000000000005,60.4799995,314.5805,53.2580556 -mux2,tsmc28,128,1000,0.31761799999999996,113.274004,578.362,15.086854999999998 -mux4,sky90,8,4655,0.21455177121374866,159.740002,86.462,42.03069198077337 -priorityencoder,tsmc28,64,14889,0.07169967788300087,148.932001,494.94,21.43820368701726 -priorityencoder,tsmc28,32,25000,0.060377,67.284001,248.946,15.275381 -flop,tsmc28,64,1000,0.04889200000000005,120.959999,630.0,10.64158826000001 -mux4,tsmc28,32,15000,0.06644566666666667,77.994,373.738,25.44869033333333 -mux8,tsmc28,16,11506,0.08617617677733357,76.86,331.206,19.131111244568054 -inv,tsmc28,1,30000,0.014172333333333332,0.252,1.005,0.043877544 -priorityencoder,tsmc28,32,16118,0.06203743702692642,56.574,177.458,8.561166309715848 -csa,sky90,64,3000,0.2641803333333333,940.799988,110.49,267.086317 -flop,sky90,8,8571,0.11434450029168125,133.279999,64.8145,189.7718499090888 -flop,sky90,8,10000,0.114344,133.279999,64.8145,221.4100044 -csa,sky90,16,5862,0.16847624223814398,528.22001,303.743,264.6761765561242 -priorityencoder,sky90,64,10000,0.239263,934.920017,532.197,247.397942 -mult,tsmc28,16,3897,0.25939064690787783,1872.486,13900.0,1593.6961346020014 -mux2,tsmc28,128,5000,0.18098,114.408004,584.652,35.2911 -mux4,tsmc28,128,14424,0.07559189628397116,369.053996,1850.0,115.80678510704381 -csa,tsmc28,64,22871,0.04244549263259149,249.984005,1920.0,107.00508692676314 -flop,tsmc28,16,22356,0.04889172105922347,30.24,157.29,59.53300414776346 -priorityencoder,sky90,128,3561,0.2874549943836001,1717.940032,652.843,140.56549225358046 -priorityencoder,sky90,128,3218,0.3117350198881293,1546.44003,459.68,106.30164178185208 -mux4,tsmc28,1,25730,0.0384481379712398,4.158,30.449,1.2784005875437234 -add,sky90,128,1000,0.998732,3454.500067,498.823,856.912056 -flop,sky90,32,1000,0.114344,533.119995,259.258,88.5365592 -mux8,sky90,1,7200,0.1873938888888889,78.400002,53.42,25.598005222222223 -mux4,sky90,32,3726,0.2671803263553409,447.860009,138.381,135.9947861148685 -csa,tsmc28,32,5000,0.067577,68.543999,229.117,9.737845700000001 -mux2,tsmc28,128,15578,0.06391209282321222,145.782001,1020.0,51.83270727962511 -mult,tsmc28,128,1791,0.5583472920156337,40650.246227,230000.0,43608.59854829704 -mux4d,sky90,1,20000,0.103077,51.940001,49.18,28.140021 -mux2,tsmc28,8,1000,0.08170100000000002,7.56,38.026,0.19689941000000005 -mux4,sky90,16,4803,0.2558442063293775,359.660007,120.182,90.56884904059963 -mux2,sky90,64,4060,0.24566741871921183,514.50001,165.954,163.6145008669951 -flop,tsmc28,8,20700,0.04889217874396135,15.12,78.634,27.560521157971014 -priorityencoder,sky90,16,5306,0.18675988767433094,119.560002,50.466,16.098702317527326 -csa,sky90,128,5617,0.17789797739006588,4641.28009,3300.0,2384.722386913833 -shiftleft,tsmc28,16,12292,0.08241872600065082,142.001999,686.111,38.48954504230393 -priorityencoder,sky90,64,3124,0.3200564327784891,644.840013,160.041,46.056120676824584 -inv,tsmc28,1,50000,0.015147,0.378,2.478,0.27658422000000005 -mux2,tsmc28,8,32005,0.033745117950320265,16.758,114.743,5.881774058740822 -mux2,tsmc28,1,75000,0.019658333333333333,2.142,15.112,0.8885566666666668 -comparator,sky90,128,3397,0.3513173918163085,2877.280054,1140.0,450.38889630850747 -mux2d,sky90,1,12953,0.0756591925422682,19.6,18.562,5.908982937551147 -priorityencoder,sky90,8,7362,0.1356376541700625,49.980001,33.776,9.264051779815269 -mux4,tsmc28,128,10,0.3817059999999941,212.561998,731.833,0.27979049799999567 -shiftleft,sky90,8,4144,0.2403047413127413,215.600004,131.982,54.86157244169883 -mux2,tsmc28,128,18287,0.05796565505550391,197.316,1160.0,62.892735735221734 -inv,sky90,1,30000,0.03425433333333333,2.94,3.973,1.2961839733333331 -mux2,sky90,8,5610,0.18835111942959,64.680001,21.541,20.417261346167557 -comparator,sky90,16,4181,0.2459612303276728,417.480006,282.659,88.05412045730687 -add,sky90,128,2002,0.5286264995004996,6468.000119,2440.0,2366.6608382637364 -priorityencoder,sky90,8,7646,0.13037333978550877,49.000001,18.712,10.364680512947947 -mult,tsmc28,128,1830,0.546448087431694,42308.910244,246000.0,46647.54098360657 -mux4d,tsmc28,1,31880,0.030080628607277293,4.158,30.468,1.2315009351819326 -mult,sky90,32,716,1.3966020446927374,21648.200355,9490.0,19193.50190021229 -mux8,tsmc28,1,17409,0.06011055321959906,9.072,57.908,2.9213728864725144 -priorityencoder,tsmc28,128,11439,0.0874122290410001,315.252,980.365,40.908923191188045 -shiftleft,sky90,8,4409,0.259935800181447,218.540004,115.4,66.54356484645044 -mult,tsmc28,32,3095,0.3276207770597738,6523.271993,49500.0,6578.952824137317 -priorityencoder,sky90,16,5000,0.195991,109.760002,39.953,13.386185300000001 -mux8,sky90,128,2723,0.3825720124862284,4173.82008,1210.0,830.563839107602 -flop,sky90,64,8571,0.11434450029168125,1066.23999,520.0,1517.408691120756 -priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,126.1332569785861 -mux4,sky90,8,4940,0.21053714979757085,164.640002,96.679,46.08658209068825 -mux8,tsmc28,32,12183,0.08988458909956497,202.104,984.173,51.50386955405072 -inv,tsmc28,1,60000,0.014172666666666667,0.252,1.005,0.08787053333333335 -mux2,sky90,32,4752,0.20550371043771046,369.460007,247.031,131.5223746801347 -mux8,sky90,1,5123,0.19421012609798946,76.440001,48.842,14.293865280812025 -comparator,tsmc28,128,1000,0.606101,256.787999,855.613,34.8508075 -priorityencoder,sky90,128,3270,0.3057743975535168,1555.26003,544.469,111.30188070948012 -mult,tsmc28,128,20000,0.525663,48838.986242,299000.0,586475.901144 -mux2,tsmc28,16,18384,0.053569126196692775,15.246,84.024,4.987285648912098 -add,tsmc28,64,7868,0.12709710218607015,371.826002,1690.0,136.88357905439756 -mult,sky90,16,1076,0.9934390297397769,8161.440149,6040.0,7758.758822267659 -mux2,tsmc28,64,20092,0.05297405315548477,87.318,456.882,26.275130365120447 -flop,tsmc28,32,19458,0.048891743344639735,60.4799995,314.5785,103.63582836763284 -mux8,tsmc28,128,11318,0.09646083300936562,644.111997,3320.0,189.64199769641283 -mux4,tsmc28,8,18130,0.05509219801434087,27.971999,133.963,8.021424030888031 -mult,sky90,16,500,1.999668,3913.140023,588.109,1415.764944 -priorityencoder,sky90,32,2658,0.3727227238525207,257.740005,46.003,21.133378442437923 -add,sky90,32,2781,0.40169588385472854,1447.460027,608.913,689.711832578569 -priorityencoder,sky90,64,3249,0.30759801138811943,645.820012,155.925,41.218133526008 -flop,sky90,32,8920,0.11434462331838566,533.119995,259.258,789.7211409484305 -priorityencoder,sky90,128,3618,0.2948967987838585,1770.860032,685.479,152.46164497125483 -mux8,sky90,64,3179,0.3292943284051589,2232.440042,845.289,511.72338634161696 -add,sky90,32,2500,0.42241700000000004,1399.440026,589.417,588.8492980000001 -mult,tsmc28,128,1982,0.5204548678102926,51085.31424,319000.0,61317.91070593744 -priorityencoder,sky90,128,5000,0.306166,1653.260031,552.149,174.51462 -flop,sky90,128,10000,0.114344,2132.4799805,1035.0,3540.66196 -flop,tsmc28,32,21272,0.048892154193305754,60.4799995,314.5805,113.28801048130876 -shiftleft,tsmc28,8,14576,0.06860292755214051,48.132,364.308,16.896901056092208 -csa,tsmc28,8,5000,0.067577,17.136,57.229,2.4395297 -mux4,tsmc28,128,20000,0.074873,399.041992,1550.0,145.25362 -priorityencoder,sky90,128,2764,0.361354500723589,1401.400027,294.34,77.00464410419681 -shiftleft,sky90,32,2850,0.3779011929824561,1791.440029,1220.0,525.6605594385965 -inv,tsmc28,1,100000,0.009913,0.252,1.094,0.11251255 -shiftleft,tsmc28,16,10000,0.09990500000000001,65.772,298.445,20.390610500000005 -mux2d,sky90,1,50000,0.075659,19.6,18.562,22.773359 -comparator,sky90,8,4922,0.20578444331572532,205.800004,120.669,48.359344179195446 -mult,sky90,64,696,1.5519606091954021,95740.121609,58700.0,114851.29292289655 -priorityencoder,tsmc28,32,16789,0.060875808982071594,65.52,244.211,10.342799946053963 -mux4,tsmc28,1,25205,0.038448667724657805,4.158,30.449,1.2534265678238443 -shiftleft,sky90,32,2743,0.3787993456069996,1930.600031,1440.0,596.9877686766314 -csa,tsmc28,8,24362,0.0409855330432641,52.416,480.368,21.763318045973236 -flop,tsmc28,8,40000,0.048892000000000005,15.12,78.635,53.2849462 -mux4,sky90,8,5033,0.22075065487780648,203.840004,102.281,51.92055402726009 -mux2,sky90,1,50000,0.083009,13.72,12.3,16.253162200000002 -flop,tsmc28,8,10,0.048889000000002625,15.12,78.6345,0.013320296940000717 -flop,sky90,32,10000,0.114344,533.119995,259.258,885.30842 -mult,tsmc28,32,5000,0.32994100000000004,6358.967994,46700.0,10586.486926000001 -priorityencoder,sky90,64,2943,0.3384623306150187,601.720012,105.802,39.60009268195718 -priorityencoder,tsmc28,32,1000,0.24741400000000002,25.326,69.772,0.97481116 -add,tsmc28,32,9233,0.10822115910321672,178.920002,876.315,74.23971514480667 -mux4,sky90,16,4213,0.2508225506764776,283.220006,87.245,76.50087795632565 -add,tsmc28,16,10842,0.09216790518354548,69.552001,341.508,25.5305097358421 -priorityencoder,sky90,32,4489,0.2227077631989307,319.480006,152.013,34.6978695063934 -priorityencoder,sky90,32,2983,0.3310469869259135,283.220006,50.998,20.657331984177002 -mux8,tsmc28,8,10,0.11437999999999704,30.114,111.042,0.005856255999999848 -mux2,sky90,64,3220,0.3045710062111801,448.840009,132.775,160.20434926708074 -mux8d,tsmc28,1,20000,0.048887,6.426,35.526,1.7452659 -shiftleft,sky90,32,2689,0.3990644592785422,1775.760029,1170.0,549.9108248858312 -flop,sky90,16,9270,0.11434386515641856,266.5599975,129.629,410.32296011380805 -priorityencoder,sky90,64,3312,0.3015083671497585,648.760012,157.266,42.03026638067634 -mux8,sky90,128,2836,0.3556033088857546,4469.780085,1660.0,952.6612645049366 -mux8,sky90,16,3349,0.2980415959988056,656.600002,320.715,162.43266981934903 -add,tsmc28,64,8538,0.12130544811431249,441.630003,2230.0,171.52590363363788 -mux2,sky90,128,3837,0.26133227625749283,1385.720006,833.846,492.35000846911646 -flop,tsmc28,64,19872,0.04889206119162641,120.959999,630.0,211.6292868679549 -shiftleft,tsmc28,16,12784,0.08281477847309136,125.621999,678.965,37.10102075594493 -mux2,sky90,64,4143,0.24532898720733767,472.360009,114.673,154.80259092783007 -mux4d,sky90,1,9507,0.10386665267697485,50.960001,47.194,13.315704873188174 -mult,sky90,128,500,1.999997,268692.483349,81600.0,219863.67020400002 -add,tsmc28,8,1000,0.25532200000000005,15.75,58.809,1.8204458600000002 -csa,sky90,16,6106,0.16536133770062234,650.720013,413.676,311.54076022797244 -mult,tsmc28,32,10,2.1397929999999974,1770.426003,6970.0,31.15538607999996 -mux4,tsmc28,16,10000,0.09861500000000001,28.224,100.845,8.569643500000002 -csa,tsmc28,64,5000,0.067577,137.087997,458.434,19.462176 -add,tsmc28,32,10,0.9349590000000063,67.157999,230.644,0.30292671600000204 -comparator,sky90,64,3068,0.3699802411994785,1251.460024,221.953,160.94140492177314 -mux8,sky90,64,10,1.3133009999999956,1610.140031,110.909,3.6772427999999877 -priorityencoder,sky90,32,3187,0.3132987097583935,280.280005,47.039,18.265314778914345 -priorityencoder,tsmc28,16,21635,0.04622240050843541,22.428,78.51,3.956637483522071 -flop,tsmc28,16,19044,0.048891976895610166,30.24,157.289,50.72787062804033 -mult,tsmc28,128,10000,0.52686,49355.208205,302000.0,297905.0841 -flop,tsmc28,16,21114,0.048891939945060144,30.24,157.29,56.23550932480817 -mux2,sky90,16,3840,0.2378916666666667,115.640002,38.479,38.609817500000005 -flop,tsmc28,128,10,0.048889000000002625,241.919998,1260.0,0.21305826200001143 -flop,sky90,8,8036,0.11434401991040319,133.279999,64.8145,177.89642617660525 -shiftleft,tsmc28,128,6883,0.1452804859799506,1687.769984,7890.0,497.1498230233909 -mux2,sky90,16,5392,0.20207394065281897,119.560002,32.354,42.33449056676558 -mux2,sky90,64,1000,0.962122,442.960009,77.324,165.1001352 -flop,sky90,64,10,0.1143419999999935,1066.23999,520.0,1.7717864609998994 -mux4d,tsmc28,1,33914,0.029486347820958898,6.426,53.381,1.9667393996579585 -mux8,sky90,32,3205,0.31692848049921996,1063.300021,372.61,231.67471924492978 -priorityencoder,tsmc28,8,31919,0.035221302296437856,9.198,37.469,2.081578965719477 -mux2,tsmc28,32,18619,0.05585557725978839,32.130001,171.146,10.294182888978998 -mux4d,tsmc28,1,40000,0.029486000000000002,6.426,53.388,2.3058052 -inv,tsmc28,1,150000,0.014038666666666668,0.504,4.002,0.8254736000000001 -mux4,sky90,32,1000,0.864849,423.360008,27.847,87.1767792 -flop,tsmc28,16,15000,0.04889166666666667,30.24,157.29,39.93715791666667 -mux2,sky90,64,3360,0.2885260476190476,448.840009,147.731,161.2860606190476 -priorityencoder,tsmc28,128,11206,0.0892179082634303,274.428001,811.403,34.08124095663038 -mux2,tsmc28,32,17903,0.05585556035301346,32.130001,171.146,9.897605294553983 -mux8,sky90,1,5550,0.17963118018018018,74.480001,41.815,14.280678824324324 -add,sky90,64,2095,0.486730968973747,2797.900054,769.441,787.5307077995226 -add,sky90,32,2872,0.41581841504178274,1443.540028,617.001,699.8223925153203 -add,tsmc28,32,9041,0.11059323371308484,179.928002,878.301,74.53983952261918 -add,sky90,16,2931,0.33991248447628797,623.280012,352.919,268.5308627362675 -csa,tsmc28,8,25854,0.040894734431809396,50.4,473.705,22.819261812949645 -comparator,tsmc28,16,15137,0.07298328863050803,66.78,308.04,19.851454507498183 -mux2,tsmc28,1,50000,0.019658000000000002,2.142,15.112,0.5917058000000001 -csa,tsmc28,128,25000,0.040492,838.655998,7720.0,351.47056 -flop,sky90,128,8571,0.11434450029168125,2132.4799805,1035.0,3034.5315209907826 -csa,tsmc28,8,23865,0.04077636748376283,49.392,473.393,20.91827651917033 -mux8,sky90,8,3942,0.26753433587011666,364.560007,157.506,84.00578146321664 -shiftleft,tsmc28,16,20000,0.08135300000000001,134.064,655.511,58.16739500000001 -mux2,tsmc28,128,10000,0.099897,123.480003,771.154,44.454165 -mux4,tsmc28,32,5000,0.14417800000000003,54.431999,184.849,11.534240000000002 -mux4,tsmc28,128,10000,0.098899,224.153997,821.731,67.152421 -priorityencoder,tsmc28,16,23366,0.04534722673970727,30.24,119.671,5.691076955833263 -mux8d,sky90,1,7099,0.1341249105507818,85.260001,40.078,14.405015393153965 -shiftleft,sky90,128,1793,0.5575724841048522,7695.940136,3730.0,2006.703370293363 -priorityencoder,tsmc28,16,22933,0.04644428496053722,26.712,104.46,5.062427060698557 -priorityencoder,sky90,32,3390,0.2909642507374631,289.100005,59.434,24.61557561238938 -add,sky90,128,2042,0.5237349647404506,6555.220126,2650.0,2475.69517832811 -mux4,sky90,32,10,0.8744200000000006,423.360008,27.792,0.9137689000000007 -add,tsmc28,16,10390,0.09614639076034648,72.954,374.979,29.132356400384985 -mux8,sky90,64,2897,0.3510466738004832,2040.36004,651.526,417.3944951487746 -mux2,tsmc28,16,20583,0.05247078273332362,15.372,84.373,5.446467247718992 -priorityencoder,sky90,32,3322,0.29804847983142685,285.180005,67.743,27.569484384406984 -csa,tsmc28,32,30000,0.040226333333333336,201.599998,1890.0,102.09443399999999 -csa,tsmc28,32,26848,0.04022672228843861,209.663999,1960.0,94.13053015494636 -mux4,sky90,16,4719,0.25584430281839377,359.660007,120.182,89.03381738080103 -mux4,sky90,16,4571,0.26359750973528767,358.680007,100.453,95.68589603390943 -mult,tsmc28,64,2244,0.44563279857397503,13157.172078,82900.0,15068.181818181818 -mux4,tsmc28,128,14157,0.07512343427279791,336.419997,1650.0,105.54842515328106 -mult,sky90,128,2500,1.9229159999999998,304572.244252,143000.0,1569385.970484 -mux8,tsmc28,32,12670,0.08457559826361484,166.194,848.034,48.37724220678769 -mux8d,tsmc28,1,10000,0.06199500000000001,5.04,17.442,0.8629704000000001 -comparator,sky90,16,10,0.9970469999999949,252.840005,31.402,0.26022926699999865 diff --git a/synthDC/ppa/README b/synthDC/ppa/README new file mode 100644 index 000000000..2bdf2edae --- /dev/null +++ b/synthDC/ppa/README @@ -0,0 +1,32 @@ +Wally PPA Study +July 8, 2022 +Madeleine Masser-Frye +mmasserfrye@hmc.edu +___________________ +Apologies for issues in this folder, code was written originally for individual use and documentation was compiled in haste. Please feel free to contact the author with questions. + +------------------- +ppaSynth.py + +Run to synthesize datapath modules from src/ppa. +To run a specific combination of widths, modules, techs, and freqs, +modify those lists and use allCombos() to generate synthsToRun (comment out freqSweep). +To run a sweep of frequencies around the best delay found in existing syntheses (according to bestSynths.csv), modify the parameters and use freqSweep to generate synthsToRun. +To remove synths to be run that already exist in /runs from synthsToRun, use filterRedundant(). +Syntheses run in parallel but you may encounter issues doing more than a dozen or so at once. +------------------- +ppaAnalyze.py + +Run to plot results of PPA syntheses. See docstrings for individual function info. +------------------- +bestSynths.csv + +Results of the synthesis for each combination of module, width, and tech with the best achievable delay. Generated by csvOfBest() in ppaAnalyze.py +------------------- +ppaFitting.csv & ppaEquations.csv + +Representations of the regression fit for each module and metric. Generated in ppaAnalyze.py by makeCoefTable() and makeEqTable(). +------------------- +ppaData.csv + +Results from all synthesis runs. Generated by synthsintocsv() and used by synthsfromcsv in ppaAnalyze.py. diff --git a/synthDC/bestSynths.csv b/synthDC/ppa/bestSynths.csv similarity index 100% rename from synthDC/bestSynths.csv rename to synthDC/ppa/bestSynths.csv diff --git a/synthDC/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py similarity index 94% rename from synthDC/ppaAnalyze.py rename to synthDC/ppa/ppaAnalyze.py index db97d0f95..2dce62ae5 100755 --- a/synthDC/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -8,9 +8,11 @@ import re from matplotlib.cbook import flatten import matplotlib.pyplot as plt import matplotlib.lines as lines +import matplotlib as mpl import numpy as np from collections import namedtuple import sklearn.metrics as skm +import os def synthsfromcsv(filename): Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") @@ -245,7 +247,7 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc)) titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" ax.set_title(module + titleStr) - plt.savefig('./plots/PPA/'+ module + '_' + var + '.png') + plt.savefig('.plots/'+ module + '_' + var + '.png') # plt.show() return r2 @@ -518,8 +520,8 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False): if no freq specified, uses the synthesis with best achievable delay for each width overlays data from both techs ''' - plt.rcParams["figure.figsize"] = (7,3.46) - fig, axs = plt.subplots(2, 2) + with mpl.rc_context({"figure.figsize": (7,3.46)}): + fig, axs = plt.subplots(2, 2) arr = [['delay', 'area'], ['lpower', 'denergy']] @@ -550,11 +552,13 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False): if freq != 10: n = 'normalized' if norm else 'unnormalized' - saveStr = './plots/PPA/'+ n + '/' + mod + '.png' + saveStr = './plots/'+ n + '/' + mod + '.png' plt.savefig(saveStr) # plt.show() def makeLineLegend(): + ''' generates legend to accompany normalized plots + ''' plt.rcParams["figure.figsize"] = (5.5,0.3) fig = plt.figure() fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')] @@ -563,7 +567,7 @@ def makeLineLegend(): fullLeg += [lines.Line2D([0], [0], color='green', label='sky90', marker='o')] fullLeg += [lines.Line2D([0], [0], color='red', label='combined', marker='_')] fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc='center') - saveStr = './plots/PPA/legend.png' + saveStr = './plots/legend.png' plt.savefig(saveStr) def muxPlot(fits='clsgn', norm=True): @@ -616,9 +620,11 @@ def muxPlot(fits='clsgn', norm=True): ax.set_title('mux timing') ax.legend(handles = fullLeg) - plt.savefig('./plots/PPA/mux.png') + plt.savefig('./plots/mux.png') def stdDevError(): + ''' calculates std deviation and error for paper-writing purposes + ''' for var in ['delay', 'area', 'lpower', 'denergy']: errlist = [] for module in modules: @@ -668,6 +674,30 @@ def stdDevError(): print(var, ' ', avgErr, ' ', stdv) +def makePlotDirectory(): + ''' creates plots directory in same level as this script to store plots in + ''' + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, 'plots') + if not os.path.exists(final_directory): + os.makedirs(final_directory) + os.chdir(final_directory) + + for folder in ['freqBuckshot', 'normalized', 'unnormalized']: + new_directory = os.path.join(final_directory, folder) + if not os.path.exists(new_directory): + os.makedirs(new_directory) + os.chdir(new_directory) + if 'freq' in folder: + for tech in ['sky90', 'tsmc28']: + for mod in modules: + tech_directory = os.path.join(new_directory, tech) + mod_directory = os.path.join(tech_directory, mod) + if not os.path.exists(mod_directory): + os.makedirs(mod_directory) + os.chdir('..') + + os.chdir(current_directory) if __name__ == '__main__': ############################## @@ -686,26 +716,22 @@ if __name__ == '__main__': ############################## # cleanup() # run to remove garbage synth runs - # synthsintocsv() # slow, run only when new synth runs to add to csv + synthsintocsv() # slow, run only when new synth runs to add to csv allSynths = synthsfromcsv('ppaData.csv') # your csv here! bestSynths = csvOfBest('bestSynths.csv') + makePlotDirectory() - # ### function examples - # squareAreaDelay('sky90', 'add', 32) - # oneMetricPlot('mult', 'lpower') - # freqPlot('sky90', 'mux4', 16) - # plotBestAreas('add') + # ### other functions # makeCoefTable() # makeEqTable() - # makeLineLegend() # muxPlot() # stdDevError() for mod in modules: - plotPPA(mod, norm=False) - plotPPA(mod, aleOpt=True) for w in widths: freqPlot('sky90', mod, w) freqPlot('tsmc28', mod, w) + plotPPA(mod, norm=False) + plotPPA(mod, aleOpt=True) plt.close('all') \ No newline at end of file diff --git a/synthDC/ppaData.csv b/synthDC/ppa/ppaData.csv similarity index 100% rename from synthDC/ppaData.csv rename to synthDC/ppa/ppaData.csv diff --git a/synthDC/ppaEquations.csv b/synthDC/ppa/ppaEquations.csv similarity index 100% rename from synthDC/ppaEquations.csv rename to synthDC/ppa/ppaEquations.csv diff --git a/synthDC/ppaFitting.csv b/synthDC/ppa/ppaFitting.csv similarity index 100% rename from synthDC/ppaFitting.csv rename to synthDC/ppa/ppaFitting.csv diff --git a/synthDC/ppaSynth.py b/synthDC/ppa/ppaSynth.py similarity index 53% rename from synthDC/ppaSynth.py rename to synthDC/ppa/ppaSynth.py index 842140b55..cbfd52538 100755 --- a/synthDC/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -10,47 +10,64 @@ def runCommand(module, width, tech, freq): command = "make synth DESIGN=ppa_{}_{} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) subprocess.Popen(command, shell=True) -def deleteRedundant(LoT): +def deleteRedundant(synthsToRun): '''removes any previous runs for the current synthesis specifications''' synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*" - for synth in LoT: + for synth in synthsToRun: bashCommand = synthStr.format(*synth) outputCPL = subprocess.check_output(['bash','-c', bashCommand]) -if __name__ == '__main__': - - LoT = [] +def freqSweep(module, width, tech): synthsToRun = [] - - ##### Run specific syntheses - # widths = [8] - # modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8'] - # techs = ['sky90'] - # freqs = [5000] - # for w in widths: - # for module in modules: - # for tech in techs: - # for freq in freqs: - # LoT += [[module, str(w), tech, str(freq)]] - - ##### Run a sweep based on best delay found in existing syntheses arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8] allSynths = synthsfromcsv('bestSynths.csv') for synth in allSynths: - f = 1000/synth.delay - for freq in [round(f+f*x/100) for x in arr]: - LoT += [[synth.module, str(synth.width), synth.tech, str(freq)]] - - ##### Only do syntheses for which a run doesn't already exist + if (synth.module == module) & (synth.tech == tech) & (synth.width == width): + f = 1000/synth.delay + for freq in [round(f+f*x/100) for x in arr]: + synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]] + return synthsToRun + +def filterRedundant(synthsToRun): bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) specReg = re.compile('[a-zA-Z0-9]+') allSynths = output.decode("utf-8").split('\n')[:-1] allSynths = [specReg.findall(oneSynth)[2:7] for oneSynth in allSynths] allSynths = [oneSynth[0:2] + [oneSynth[3][:-2]] + [oneSynth[4]] for oneSynth in allSynths] - for synth in LoT: + output = [] + for synth in synthsToRun: if (synth not in allSynths): - synthsToRun += [synth] + output += [synth] + return output + +def allCombos(widths, modules, techs, freqs): + synthsToRun = [] + for w in widths: + for module in modules: + for tech in techs: + for freq in freqs: + synthsToRun += [[module, str(w), tech, str(freq)]] + return synthsToRun + + +if __name__ == '__main__': + + ##### Run specific syntheses + widths = [8, 16, 32, 64, 128] + modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8'] + techs = ['sky90', 'tsmc28'] + freqs = [5000] + synthsToRun = allCombos(widths, modules, techs, freqs) + + ##### Run a sweep based on best delay found in existing syntheses + module = 'add' + width = 32 + tech = 'sky90' + synthsToRun = freqSweep(module, width, tech) + + ##### Only do syntheses for which a run doesn't already exist + synthsToRun = filterRedundant(synthsToRun) pool = Pool(processes=25) - pool.starmap(runCommand, synthsToRun) \ No newline at end of file + pool.starmap(print, synthsToRun) \ No newline at end of file diff --git a/synthDC/runAllSynths.sh b/synthDC/runAllSynths.sh deleted file mode 100755 index 6944552d4..000000000 --- a/synthDC/runAllSynths.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/bash - -make clean -mv runs runArchive/$(date +"%Y_%m_%d_%I_%M_%p") -mv newRuns runs -mkdir newRuns -./wallySynth.py \ No newline at end of file diff --git a/synthDC/scripts/extractSummary.py b/synthDC/scripts/extractSummary.py deleted file mode 100755 index add95068c..000000000 --- a/synthDC/scripts/extractSummary.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python3 -# Shreya Sanghai (ssanghai@hmc.edu) 2/28/2022 -import glob -import re -import csv -import linecache -import os - -# field_names = [ 'Name', 'Critical Path Length', 'Cell Area', 'Synth Time'] -# data = [] -# for name in glob.glob("/home/ssanghai/riscv-wally/synthDC/runs/*/reports/wallypipelinedcore_qor.rep"): -# f = open(name, 'r') -# # trimName = re.search("runs\/(.*?)\/reports", name).group(1) -# trimName = re.search("wallypipelinedcore_(.*?)_sky9",name).group(1) -# for line in f: -# if "Critical Path Length" in line: -# pathLen = re.search("Length: *(.*?)\\n", line).group(1) -# if "Cell Area" in line: -# area = re.search("Area: *(.*?)\\n", line).group(1) -# if "Overall Compile Time" in line: -# time = re.search("Time: *(.*?)\\n", line).group(1) -# data += [{'Name' : trimName, 'Critical Path Length': pathLen, 'Cell Area' : area, 'Synth Time' :time}] - -def main(): - data = [] - curr_dir = os.path.dirname(os.path.abspath(__file__)) - output_file = os.path.join(curr_dir,"..","Summary.csv") - runs_dir = os.path.join(curr_dir,"..","runs/*/reports/wallypipelinedcore_qor.rep") - # cruns_dir = "/home/ssanghai/Desktop/cleanRun/*/reports/wallypipelinedcore_qor.rep" - search_strings = [ - "Critical Path Length:", "Cell Area:", "Overall Compile Time:", - "Critical Path Clk Period:", "Critical Path Slack:" - ] - for name in glob.glob(runs_dir): - f = open(name, 'r') - trimName = re.search("wallypipelinedcore_(.*?)_sky",name).group(1) - - output = {'Name':trimName} - num_lines = len(f.readlines()) - curr_line_index = 0 - - while curr_line_index < num_lines: - line = linecache.getline(name, curr_line_index) - for search_string in search_strings: - if search_string in line: - val = getVal(name,search_string,line,curr_line_index) - output[search_string] = val - curr_line_index +=1 - data += [output] - - with open(output_file, 'w') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=['Name'] + search_strings) - writer.writeheader() - writer.writerows(data) - -def getVal(filename, search_string, line, line_index): - data = re.search(f"{search_string} *(.*?)\\n", line).group(1) - if data == '': #sometimes data is stored in two line - data = linecache.getline(filename, line_index+1).strip() - return data - -if __name__=="__main__": - main() - \ No newline at end of file diff --git a/synthDC/scripts/runConfigsSynth.sh b/synthDC/scripts/runConfigsSynth.sh deleted file mode 100755 index 84e1f6d77..000000000 --- a/synthDC/scripts/runConfigsSynth.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/bash -# rm -r runs/* -make clean -make del -make copy -make configs -make allsynth -scripts/extractSummary.py -make del \ No newline at end of file diff --git a/synthDC/scripts/runFrequencySynth.sh b/synthDC/scripts/runFrequencySynth.sh deleted file mode 100755 index 5aa998997..000000000 --- a/synthDC/scripts/runFrequencySynth.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/bash -# rm -r runs/* -make clean -make del -make freqs TECH=$1 -scripts/extractSummary.py -make del diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 4d444682a..9b72849f8 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -3,6 +3,9 @@ # james.stine@okstate.edu 27 Sep 2015 # +# start run clock +set t1 [clock seconds] + # Ignore unnecessary warnings: # intraassignment delays for nonblocking assignments are ignored suppress_message {VER-130} @@ -23,11 +26,11 @@ set saifpower $::env(SAIFPOWER) set maxopt $::env(MAXOPT) set drive $::env(DRIVE) -eval file copy -force ${cfg} {hdl/} +eval file copy -force ${cfg} {$outputDir/hdl/} eval file copy -force ${cfg} $outputDir -eval file copy -force [glob ${hdl_src}/../config/shared/*.vh] {hdl/} -eval file copy -force [glob ${hdl_src}/*/*.sv] {hdl/} -eval file copy -force [glob ${hdl_src}/*/flop/*.sv] {hdl/} +eval file copy -force [glob ${hdl_src}/../config/shared/*.vh] {$outputDir/hdl/} +eval file copy -force [glob ${hdl_src}/*/*.sv] {$outputDir/hdl/} +eval file copy -force [glob ${hdl_src}/*/flop/*.sv] {$outputDir/hdl/} # Only for FMA class project; comment out when done # eval file copy -force [glob ${hdl_src}/fma/fma16.v] {hdl/} @@ -38,7 +41,7 @@ if { $saifpower == 1 } { } # Verilog files -set my_verilog_files [glob hdl/*] +set my_verilog_files [glob $outputDir/hdl/*] # Set toplevel set my_toplevel $::env(DESIGN) @@ -53,7 +56,8 @@ set vhdlout_show_unconnected_pins "true" # Due to parameterized Verilog must use analyze/elaborate and not # read_verilog/vhdl (change to pull in Verilog and/or VHDL) # -define_design_lib WORK -path ./WORK +#set alib_library_analysis_path ./$outputDir +define_design_lib WORK -path ./$outputDir/WORK analyze -f sverilog -lib WORK $my_verilog_files elaborate $my_toplevel -lib WORK @@ -102,7 +106,7 @@ set_critical_range [expr $my_period*0.05] $current_design # Partitioning - flatten or hierarchically synthesize if { $maxopt == 1 } { - ungroup -all -flatten -simple_names + ungroup -all -simple_names -flatten } # Set input pins except clock @@ -131,8 +135,13 @@ if {$tech == "sky130"} { } # Set input/output delay -set_input_delay 0.0 -max -clock $my_clk $all_in_ex_clk -set_output_delay 0.0 -max -clock $my_clk [all_outputs] +if {$drive == "FLOP"} { + set_input_delay 0.1 -max -clock $my_clk $all_in_ex_clk + set_output_delay 0.1 -max -clock $my_clk [all_outputs] +} else { + set_input_delay 0.0 -max -clock $my_clk $all_in_ex_clk + set_output_delay 0.0 -max -clock $my_clk [all_outputs] +} # Setting load constraint on output ports if {$tech == "sky130"} { @@ -176,8 +185,8 @@ set_fix_multiple_port_nets -all -buffer_constants # group_path -name COMBO -from [all_inputs] -to [all_outputs] # Save Unmapped Design -#set filename [format "%s%s%s%s" $outputDir "/unmapped/" $my_toplevel ".ddc"] -#write_file -format ddc -hierarchy -o $filename +# set filename [format "%s%s%s%s" $outputDir "/unmapped/" $my_toplevel ".ddc"] +# write_file -format ddc -hierarchy -o $filename # Compile statements if { $maxopt == 1 } { @@ -338,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets - redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" } -redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 } +redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 } @@ -374,4 +383,9 @@ redirect $filename { report_constraint } set filename [format "%s%s%s%s" $outputDir "/reports/" $my_toplevel "_hier.rep"] # redirect $filename { report_hierarchy } +# end run clock and echo run time in minutes +set t2 [clock seconds] +set t [expr $t2 - $t1] +echo [expr $t/60] + quit diff --git a/synthDC/wallySynth.py b/synthDC/wallySynth.py index 99d70e813..0c1579813 100755 --- a/synthDC/wallySynth.py +++ b/synthDC/wallySynth.py @@ -3,31 +3,62 @@ import subprocess from multiprocessing import Pool +import argparse -def runCommand(config, tech, freq): - command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT=0 MAXCORES=1".format(config, tech, freq) +def runSynth(config, tech, freq, maxopt): + global pool + command = "make synth DESIGN=wallypipelinedcore CONFIG={} TECH={} DRIVE=FLOP FREQ={} MAXOPT={} MAXCORES=1".format(config, tech, freq, maxopt) + pool.map(mask, [command]) + +def mask(command): subprocess.Popen(command, shell=True) -testFreq = [3000, 10000] +def freshStart(): + out = subprocess.check_output(['bash','-c', 'make fresh']) + for x in out.decode("utf-8").split('\n')[:-1]: + print(x) + return + if __name__ == '__main__': - + techs = ['sky90', 'tsmc28'] - sweepCenter = [870, 3000] - synthsToRun = [] - - arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8] - for i in [0, 1]: - tech = techs[i] - sc = sweepCenter[i] - f = testFreq[i] - for freq in [round(sc+sc*x/100) for x in arr]: # rv32e freq sweep - synthsToRun += [['rv32e', tech, freq]] - for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64i', 'rv64ic', 'rv32e']: # configs - synthsToRun += [[config, tech, f]] - for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations - config = 'rv64gc_' + mod - synthsToRun += [[config, tech, f]] + allConfigs = ['rv32gc', 'rv32ic', 'rv64gc', 'rv64ic', 'rv32e', 'rv32i', 'rv64i'] + freqVaryPct = [-20, -12, -8, -6, -4, -2, 0, 2, 4, 6, 8, 12, 20] pool = Pool() - pool.starmap(runCommand, synthsToRun) \ No newline at end of file + + parser = argparse.ArgumentParser() + + parser.add_argument("-s", "--freqsweep", type=int, help = "Synthesize wally with target frequencies at given MHz and +/- 2, 4, 6, 8 %%") + parser.add_argument("-c", "--configsweep", action='store_true', help = "Synthesize wally with configurations 32e, 32ic, 64ic, 32gc, and 64gc") + parser.add_argument("-f", "--featuresweep", action='store_true', help = "Synthesize wally with features turned off progressively to visualize critical path") + + parser.add_argument("-v", "--version", choices=allConfigs, help = "Configuration of wally") + parser.add_argument("-t", "--targetfreq", type=int, help = "Target frequncy") + parser.add_argument("-e", "--tech", choices=techs, help = "Technology") + parser.add_argument("-o", "--maxopt", action='store_true', help = "Turn on MAXOPT") + + args = parser.parse_args() + + freq = args.targetfreq if args.targetfreq else 3000 + tech = args.tech if args.tech else 'sky90' + maxopt = int(args.maxopt) + + if args.freqsweep: + sc = args.freqsweep + config = args.version if args.version else 'rv32e' + freshStart() + for freq in [round(sc+sc*x/100) for x in freqVaryPct]: # rv32e freq sweep + runSynth(config, tech, freq, maxopt) + if args.configsweep: + freshStart() + for config in ['rv32gc', 'rv32ic', 'rv64gc', 'rv64ic', 'rv32e']: # configs + config = config + '_orig' # until memory integrated + runSynth(config, tech, freq, maxopt) + if args.featuresweep: + freshStart() + v = args.version if args.version else 'rv64gc' + for mod in ['FPUoff', 'noMulDiv', 'noPriv', 'PMP0', 'PMP16']: # rv64gc path variations + config = v + '_' + mod + runSynth(config, tech, freq, maxopt) diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 621a5b54b..af67a5357 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_arch build_wally memfile +all: root build_arch #build_wally memfile root: mkdir -p $(work_dir) diff --git a/tests/riscof/spike/spike_rv32imc_isa.yaml b/tests/riscof/spike/spike_rv32imc_isa.yaml index 5a76fd978..04a5da180 100644 --- a/tests/riscof/spike/spike_rv32imc_isa.yaml +++ b/tests/riscof/spike/spike_rv32imc_isa.yaml @@ -1,11 +1,11 @@ hart_ids: [0] hart0: - ISA: RV32IMAFCZicsr_Zifencei + ISA: RV32IMAFDCZicsr_Zifencei physical_addr_sz: 32 User_Spec_Version: '2.3' supported_xlen: [32] misa: - reset-val: 0x40001125 + reset-val: 0x4000112D rv32: accessible: true mxl: @@ -23,6 +23,6 @@ hart0: warl: dependency_fields: [] legal: - - extensions[25:0] bitmask [0x0001125, 0x0000000] + - extensions[25:0] bitmask [0x000112D, 0x0000000] wr_illegal: - Unchanged \ No newline at end of file diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/references/WALLY-fld.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/references/WALLY-fld.reference_output new file mode 100644 index 000000000..ab5658348 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/references/WALLY-fld.reference_output @@ -0,0 +1,3 @@ +00000000 +40000000 + diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/src/WALLY-fld.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/src/WALLY-fld.S new file mode 100644 index 000000000..79b1c963e --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/src/WALLY-fld.S @@ -0,0 +1,81 @@ +/////////////////////////////////////////// +// ../../wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/src/WALLY-fld.S +// David_Harris@hmc.edu & Katherine Parry +// Created 2022-07-07 16:55:21.991349// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "model_test.h" +#include "arch_test.h" + +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +#ifdef TEST_CASE_1 + +RVTEST_FP_ENABLE() +RVTEST_VALBASEUPD(x7,test_fp) +RVTEST_SIGBASE( x6, wally_signature) +RVTEST_ISA("RV32IFD") +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*F.*D.*);def TEST_CASE_1=True;",WALLY-fld) +inst_0: + li x1,2 + fcvt.d.w f3, x1 + la x16, rvtest_data + fsd f3,0(x16) + fld f4,0(x16) + fsd f4,0(x6) + + #endif + +.EQU NUMTESTS,3 + +RVTEST_CODE_END +RVMODEL_HALT + +RVTEST_DATA_BEGIN +.align 4 +rvtest_data: +.word 0x98765432 +.word 0x55555555 +test_fp: +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + + +wally_signature: + .fill NUMTESTS*(XLEN/32),4,0xdeadbeef + +#ifdef rvtest_mtrap_routine + +mtrap_sigptr: + .fill 64*(XLEN/32),4,0xdeadbeef + +#endif + +#ifdef rvtest_gpr_save + +gpr_save: + .fill 32*(XLEN/32),4,0xdeadbeef + +#endif + +RVMODEL_DATA_END +// ../../wally-riscv-arch-test/riscv-test-suite/rv32i_m/D/src/WALLY-fld.S +// David_Harris@hmc.edu & Katherine Parry diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 3f6dcc8e1..509d4e9e1 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -1,18 +1,16 @@ -00000000 # test reset to zero -00000000 -00000000 # output_en -00000000 # output_val -00000000 # rise_ie -00000000 # rise_ip -00000000 # fall_ie -00000000 # fall_ip -00000000 # high_ie -00000000 # high_ip -00000000 # fall_ie -ffffffff # fall_ip -00000000 # iof_en -00000000 # iof_sel -00000000 # out_xor +00000000 # reset to zero tests: input_val +00000000 # input_en +00000000 # output_en +00000000 # output_val +00000000 # rise_ie +00000000 # fall_ie +00000000 # low_ie +00000000 # high_ie +00000000 # rise_ip +00000000 # fall_ip +00000000 # high_ip +ffffffff # low_ip +00000000 # out_xor A5A5A5A5 # test output pins 5A5AFFFF 00000000 # test input enables @@ -32,5 +30,12 @@ A5FA0000 # high_ip 00000000 # MIP = 0 00000000 # MIP = 0 00000000 # MIP = 0 -00000800 # Test interrupts can be enabled and triggered: MEIP set +00000800 # Test interrupts can be enabled and triggered: MEIP set from high_ie 00000000 # MEIP = 0 +00000800 # MEIP set from low_ie +00000000 # MEIP = 0 +00000800 # MEIP set from rise_ie +00000000 # MEIP = 0 +00000800 # MEIP set from fall_ie +00000000 # MEIP = 0 + diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S index 7cfd83c1a..4d201fc9d 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-clint-01.S @@ -91,13 +91,3 @@ test_cases: .4byte 0x0, 0x00000080, readmip_test # mtip should be set .4byte 0x0, 0x0, terminate_test # terminate tests - -# =========== Experimental mtime counting test =========== - -# .4byte mtimecmph, 0xFFFFFFFF, write32_test # make sure mtip isn't set until ready -# .4byte mtimeh, 0x0FFFFFFF, write32_test # write near max value to mtimeh -# .4byte mtime, 0x00000000, write32_test # write small value to mtime -# .4byte 0x0, 0x000000000, readmip_test # mtip should be zero -# .4byte mtimecmp, 0x00000001, write32_test # write slightly larger value than mtime to test mtime counting -# .4byte mtimecmph, 0x0FFFFFFF, write32_test # write same value as mtimeh to test mtime counting -# .4byte 0x0, 0x00000080, readmip_test # mtip should be set since it has been at least two cycles diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 4b2496a77..1b3bbdb47 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -75,15 +75,13 @@ test_cases: .4byte output_en, 0x00000000, read32_test # output_en reset to zero .4byte output_val, 0x00000000, read32_test # output_val reset to zero .4byte rise_ie, 0x00000000, read32_test # rise_ie reset to zero -.4byte rise_ip, 0x00000000, read32_test # rise_ip reset to zero .4byte fall_ie, 0x00000000, read32_test # fall_ie reset to zero -.4byte fall_ip, 0xffffffff, read32_test # fall_ip reset to ones (input_val is zero) .4byte high_ie, 0x00000000, read32_test # high_ie reset to zero -.4byte high_ip, 0x00000000, read32_test # high_ip reset to zero .4byte low_ie, 0x00000000, read32_test # low_ie reset to zero -.4byte low_ip, 0x00000000, read32_test # low_ip reset to zero -.4byte iof_en, 0x00000000, read32_test # iof_en reset to zero -.4byte iof_sel, 0x00000000, read32_test # iof_sel reset to zero +.4byte rise_ip, 0x00000000, read32_test # rise_ip reset to zero +.4byte fall_ip, 0x00000000, read32_test # fall_ip reset to zero +.4byte high_ip, 0x00000000, read32_test # high_ip reset to zero +.4byte low_ip, 0xffffffff, read32_test # low_ip reset to ones since all zeroes .4byte out_xor, 0x00000000, read32_test # out_xor reset to zero # =========== Test output and input pins =========== @@ -120,7 +118,7 @@ SETUP_PLIC .4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output -.4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) +.4byte high_ip, 0xA5FA0000, read32_test # high interrupt pending .4byte low_ip, 0x5BF5FFFF, read32_test # low interrupt pending should be opposite high for enabled pins .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) @@ -137,11 +135,23 @@ SETUP_PLIC .4byte fall_ie, 0x00010000, write32_test # enable fall interrupt on bit 16, no pending interrupt .4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending -# =========== Test interrupts can be enabled and triggered +# =========== Test interrupts can be enabled and triggered =========== .4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised -.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending +.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17 +.4byte 0x0, 0x00000000, readmip_test # MEIP should be released +.4byte low_ie, 0x00010000, write32_test # enable low interrupt on bit 16, which is pending +.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised +.4byte low_ie, 0x00000000, write32_test # disable low interrupt on bit 16 +.4byte 0x0, 0x00000000, readmip_test # MEIP should be released +.4byte rise_ie, 0x00200000, write32_test # enable rise interrupt on bit 21, which is pending +.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised +.4byte rise_ie, 0x00000000, write32_test # disable rise interrupt on bit 21, which is pending +.4byte 0x0, 0x00000000, readmip_test # MEIP should be released +.4byte fall_ie, 0x01000000, write32_test # enable high interrupt on bit 24, which is pending +.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised +.4byte fall_ie, 0x00000000, write32_test # disable high interrupt on bit 24, which is pending .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index fdfc3e6d5..319f2233b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -857,6 +857,7 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a addi a6, a6, 8 .endm +// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts .macro SETUP_PLIC # Setup PLIC with a series of register writes