mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally
This commit is contained in:
commit
c2493168b6
4
.gitignore
vendored
4
.gitignore
vendored
@ -10,6 +10,10 @@ __pycache__/
|
||||
addins
|
||||
addins/riscv-arch-test/Makefile.include
|
||||
addins/riscv-tests/target
|
||||
addins/coremark/work/*
|
||||
addins/embench/bd_speed/*
|
||||
addins/embench/bd_size/*
|
||||
benchmarks/embench/wally*.json
|
||||
|
||||
#vsim work files to ignore
|
||||
transcript
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -17,6 +17,9 @@
|
||||
[submodule "addins/embench-iot"]
|
||||
path = addins/embench-iot
|
||||
url = https://github.com/embench/embench-iot
|
||||
[submodule "addins/coremark"]
|
||||
path = addins/coremark
|
||||
url = https://github.com/eembc/coremark
|
||||
[submodule "addins/sky130_osu_sc_t18"]
|
||||
path = addins/sky130_osu_sc_t18
|
||||
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t18
|
||||
|
||||
1
addins/coremark
Submodule
1
addins/coremark
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f3e8f2e0941e42961aadcc52750b1b5577c157c9
|
||||
@ -1 +1 @@
|
||||
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
|
||||
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
|
||||
29
benchmarks/coremark/Makefile
Normal file
29
benchmarks/coremark/Makefile
Normal file
@ -0,0 +1,29 @@
|
||||
#cmbase=../../addins/coremark
|
||||
PORT_DIR = $(CURDIR)/riscv64-baremetal
|
||||
cmbase=../../addins/coremark
|
||||
work_dir=$(cmbase)/work
|
||||
sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \
|
||||
$(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \
|
||||
$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
|
||||
$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
|
||||
|
||||
$(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
|
||||
riscv64-unknown-elf-objdump -D $< > $<.elf.objdump
|
||||
riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@
|
||||
extractFunctionRadix.sh $<.elf.objdump
|
||||
(cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv64gc coremark" > $(work_dir)/coremark.sim.log))
|
||||
cd ../../benchmarks/coremark/
|
||||
|
||||
$(work_dir)/coremark.bare.riscv: $(sources) Makefile
|
||||
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
|
||||
# These flags were used by WD on CoreMark
|
||||
make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "
|
||||
# -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error
|
||||
mkdir -p $(work_dir)
|
||||
mv $(cmbase)/coremark.bare.riscv $(work_dir)
|
||||
|
||||
|
||||
.PHONY: clean
|
||||
|
||||
clean:
|
||||
rm -f $(work_dir)/*
|
||||
385
benchmarks/coremark/riscv64-baremetal/core_portme.c
Executable file
385
benchmarks/coremark/riscv64-baremetal/core_portme.c
Executable file
@ -0,0 +1,385 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "coremark.h"
|
||||
#if CALLGRIND_RUN
|
||||
#include <valgrind/callgrind.h>
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD==MEM_MALLOC)
|
||||
#include <malloc.h>
|
||||
/* Function: portable_malloc
|
||||
Provide malloc() functionality in a platform specific way.
|
||||
*/
|
||||
void *portable_malloc(size_t size) {
|
||||
return malloc(size);
|
||||
}
|
||||
/* Function: portable_free
|
||||
Provide free() functionality in a platform specific way.
|
||||
*/
|
||||
void portable_free(void *p) {
|
||||
free(p);
|
||||
}
|
||||
#else
|
||||
void *portable_malloc(size_t size) {
|
||||
return NULL;
|
||||
}
|
||||
void portable_free(void *p) {
|
||||
p=NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (SEED_METHOD==SEED_VOLATILE)
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile=0x3415;
|
||||
volatile ee_s32 seed2_volatile=0x3415;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x0;
|
||||
volatile ee_s32 seed2_volatile=0x0;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x8;
|
||||
volatile ee_s32 seed2_volatile=0x8;
|
||||
volatile ee_s32 seed3_volatile=0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile=ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile=0;
|
||||
#endif
|
||||
/* Porting: Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
|
||||
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
|
||||
Sample implementation for standard time.h and windows.h definitions included.
|
||||
*/
|
||||
/* Define: TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow does not occur.
|
||||
If there are issues with the return value overflowing, increase this value.
|
||||
*/
|
||||
#if USE_CLOCK
|
||||
#define NSECS_PER_SEC CLOCKS_PER_SEC
|
||||
#define EE_TIMER_TICKER_RATE 1000
|
||||
#define CORETIMETYPE clock_t
|
||||
#define GETMYTIME(_t) (*_t=clock())
|
||||
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#elif defined(_MSC_VER)
|
||||
#define NSECS_PER_SEC 10000000
|
||||
#define EE_TIMER_TICKER_RATE 1000
|
||||
#define CORETIMETYPE FILETIME
|
||||
#define GETMYTIME(_t) GetSystemTimeAsFileTime(_t)
|
||||
#define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER)
|
||||
/* setting to millisces resolution by default with MSDEV */
|
||||
#ifndef TIMER_RES_DIVIDER
|
||||
#define TIMER_RES_DIVIDER 1000
|
||||
#endif
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#elif HAS_TIME_H
|
||||
#define NSECS_PER_SEC 1000000000
|
||||
#define EE_TIMER_TICKER_RATE 1000
|
||||
#define CORETIMETYPE struct timespec
|
||||
#define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t)
|
||||
#define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER)
|
||||
/* setting to 1/1000 of a second resolution by default with linux */
|
||||
#ifndef TIMER_RES_DIVIDER
|
||||
#define TIMER_RES_DIVIDER 1000000
|
||||
#endif
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#else
|
||||
// Defined for RISCV
|
||||
#define NSECS_PER_SEC 1000000000 // TODO: What freq are we assuming?
|
||||
#define EE_TIMER_TICKER_RATE 1000 // TODO: What is this?
|
||||
#define CORETIMETYPE clock_t
|
||||
#define read_csr(reg) ({ unsigned long __tmp; \
|
||||
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
|
||||
__tmp; })
|
||||
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
|
||||
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
|
||||
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
|
||||
#define TIMER_RES_DIVIDER 10000
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#endif
|
||||
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
#if SAMPLE_TIME_IMPLEMENTATION
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
static unsigned long start_instr_val, stop_instr_val;
|
||||
|
||||
/* Function: minstretFunc
|
||||
This function will count the number of instructions.
|
||||
*/
|
||||
unsigned long minstretFunc(void)
|
||||
{
|
||||
unsigned long minstretRead = read_csr(minstret);
|
||||
//ee_printf("Minstret is %lu\n", minstretRead);
|
||||
return minstretRead;
|
||||
}
|
||||
|
||||
/* Function: minstretDiff
|
||||
This function will take the difference between the first and second reads from the
|
||||
MINSTRET csr to determine the number of machine instructions retired between two points
|
||||
of time
|
||||
*/
|
||||
unsigned long minstretDiff(void)
|
||||
{
|
||||
unsigned long minstretDifference = MYTIMEDIFF(stop_instr_val, start_instr_val);
|
||||
return minstretDifference;
|
||||
}
|
||||
|
||||
/* Function: start_time
|
||||
This function will be called right before starting the timed portion of the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
|
||||
*/
|
||||
void start_time(void) {
|
||||
start_instr_val = minstretFunc();
|
||||
GETMYTIME(start_time_val);
|
||||
//ee_printf("Timer started\n");
|
||||
//ee_printf(" MTIME: %u\n", start_time_val);
|
||||
#if CALLGRIND_RUN
|
||||
CALLGRIND_START_INSTRUMENTATION
|
||||
#endif
|
||||
#if MICA
|
||||
asm volatile("int3");/*1 */
|
||||
#endif
|
||||
}
|
||||
/* Function: stop_time
|
||||
This function will be called right after ending the timed portion of the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or other system parameters - e.g. reading the current value of cpu cycles counter.
|
||||
*/
|
||||
void stop_time(void) {
|
||||
#if CALLGRIND_RUN
|
||||
CALLGRIND_STOP_INSTRUMENTATION
|
||||
#endif
|
||||
#if MICA
|
||||
asm volatile("int3");/*1 */
|
||||
#endif
|
||||
GETMYTIME(stop_time_val);
|
||||
stop_instr_val = minstretFunc();
|
||||
//ee_printf("Timer stopped\n");
|
||||
//ee_printf(" MTIME: %u\n", stop_time_val);
|
||||
}
|
||||
/* Function: get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other value,
|
||||
as long as it can be converted to seconds by <time_in_secs>.
|
||||
This methodology is taken to accomodate any hardware or simulated platform.
|
||||
The sample implementation returns millisecs by default,
|
||||
and the resolution is controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS get_time(void) {
|
||||
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
unsigned long instructions = minstretDiff();
|
||||
ee_printf(" Called get_time\n");
|
||||
ee_printf(" Elapsed MTIME: %u\n", elapsed);
|
||||
ee_printf(" Elapsed MINSTRET: %lu\n", instructions);
|
||||
ee_printf(" CPI: %lu / %lu\n", elapsed, instructions);
|
||||
return elapsed;
|
||||
}
|
||||
/* Function: time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accomodate systems with no support for floating point.
|
||||
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
|
||||
*/
|
||||
secs_ret time_in_secs(CORE_TICKS ticks) {
|
||||
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
float retvalint = (float) retval;
|
||||
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
|
||||
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retvalint);
|
||||
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retval);
|
||||
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retval);
|
||||
return retvalint;
|
||||
}
|
||||
#else
|
||||
#error "Please implement timing functionality in core_portme.c"
|
||||
#endif /* SAMPLE_TIME_IMPLEMENTATION */
|
||||
|
||||
ee_u32 default_num_contexts = MULTITHREAD;
|
||||
|
||||
/* Function: portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
#if PRINT_ARGS
|
||||
int i;
|
||||
for (i=0; i<*argc; i++) {
|
||||
ee_printf("Arg[%d]=%s\n",i,argv[i]);
|
||||
}
|
||||
#endif
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
|
||||
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4) {
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG))
|
||||
ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n");
|
||||
#endif
|
||||
|
||||
#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG)
|
||||
int nargs=*argc,i;
|
||||
if ((nargs>1) && (*argv[1]=='M')) {
|
||||
default_num_contexts=parseval(argv[1]+1);
|
||||
if (default_num_contexts>MULTITHREAD)
|
||||
default_num_contexts=MULTITHREAD;
|
||||
/* Shift args since first arg is directed to the portable part and not to coremark main */
|
||||
--nargs;
|
||||
for (i=1; i<nargs; i++)
|
||||
argv[i]=argv[i+1];
|
||||
*argc=nargs;
|
||||
}
|
||||
#endif /* sample of potential platform specific init via command line, reset the number of contexts being used if first argument is M<n>*/
|
||||
p->portable_id=1;
|
||||
}
|
||||
/* Function: portable_fini
|
||||
Target specific final code
|
||||
*/
|
||||
void portable_fini(core_portable *p)
|
||||
{
|
||||
p->portable_id=0;
|
||||
}
|
||||
|
||||
#if (MULTITHREAD>1)
|
||||
|
||||
/* Function: core_start_parallel
|
||||
Start benchmarking in a parallel context.
|
||||
|
||||
Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.
|
||||
Other implementations using MCAPI or other standards can easily be devised.
|
||||
*/
|
||||
/* Function: core_stop_parallel
|
||||
Stop a parallel context execution of coremark, and gather the results.
|
||||
|
||||
Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.
|
||||
Other implementations using MCAPI or other standards can easily be devised.
|
||||
*/
|
||||
#if USE_PTHREAD
|
||||
ee_u8 core_start_parallel(core_results *res) {
|
||||
return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res);
|
||||
}
|
||||
ee_u8 core_stop_parallel(core_results *res) {
|
||||
void *retval;
|
||||
return (ee_u8)pthread_join(res->port.thread,&retval);
|
||||
}
|
||||
#elif USE_FORK
|
||||
static int key_id=0;
|
||||
ee_u8 core_start_parallel(core_results *res) {
|
||||
key_t key=4321+key_id;
|
||||
key_id++;
|
||||
res->port.pid=fork();
|
||||
res->port.shmid=shmget(key, 8, IPC_CREAT | 0666);
|
||||
if (res->port.shmid<0) {
|
||||
ee_printf("ERROR in shmget!\n");
|
||||
}
|
||||
if (res->port.pid==0) {
|
||||
iterate(res);
|
||||
res->port.shm=shmat(res->port.shmid, NULL, 0);
|
||||
/* copy the validation values to the shared memory area and quit*/
|
||||
if (res->port.shm == (char *) -1) {
|
||||
ee_printf("ERROR in child shmat!\n");
|
||||
} else {
|
||||
memcpy(res->port.shm,&(res->crc),8);
|
||||
shmdt(res->port.shm);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
ee_u8 core_stop_parallel(core_results *res) {
|
||||
int status;
|
||||
pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED);
|
||||
if (wpid != res->port.pid) {
|
||||
ee_printf("ERROR waiting for child.\n");
|
||||
if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid);
|
||||
if (errno == EINTR) ee_printf("errno=Interrupted\n");
|
||||
return 0;
|
||||
}
|
||||
/* after process is done, get the values from the shared memory area */
|
||||
res->port.shm=shmat(res->port.shmid, NULL, 0);
|
||||
if (res->port.shm == (char *) -1) {
|
||||
ee_printf("ERROR in parent shmat!\n");
|
||||
return 0;
|
||||
}
|
||||
memcpy(&(res->crc),res->port.shm,8);
|
||||
shmdt(res->port.shm);
|
||||
return 1;
|
||||
}
|
||||
#elif USE_SOCKET
|
||||
static int key_id=0;
|
||||
ee_u8 core_start_parallel(core_results *res) {
|
||||
int bound, buffer_length=8;
|
||||
res->port.sa.sin_family = AF_INET;
|
||||
res->port.sa.sin_addr.s_addr = htonl(0x7F000001);
|
||||
res->port.sa.sin_port = htons(7654+key_id);
|
||||
key_id++;
|
||||
res->port.pid=fork();
|
||||
if (res->port.pid==0) { /* benchmark child */
|
||||
iterate(res);
|
||||
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
if (-1 == res->port.sock) /* if socket failed to initialize, exit */ {
|
||||
ee_printf("Error Creating Socket");
|
||||
} else {
|
||||
int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in));
|
||||
if (bytes_sent < 0)
|
||||
ee_printf("Error sending packet: %s\n", strerror(errno));
|
||||
close(res->port.sock); /* close the socket */
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
/* parent process, open the socket */
|
||||
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr));
|
||||
if (bound < 0)
|
||||
ee_printf("bind(): %s\n",strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
ee_u8 core_stop_parallel(core_results *res) {
|
||||
int status;
|
||||
int fromlen=sizeof(struct sockaddr);
|
||||
int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen);
|
||||
if (recsize < 0) {
|
||||
ee_printf("Error in receive: %s\n", strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED);
|
||||
if (wpid != res->port.pid) {
|
||||
ee_printf("ERROR waiting for child.\n");
|
||||
if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid);
|
||||
if (errno == EINTR) ee_printf("errno=Interrupted\n");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#else /* no standard multicore implementation */
|
||||
#error "Please implement multicore functionality in core_portme.c to use multiple contexts."
|
||||
#endif /* multithread implementations */
|
||||
#endif
|
||||
296
benchmarks/coremark/riscv64-baremetal/core_portme.h
Executable file
296
benchmarks/coremark/riscv64-baremetal/core_portme.h
Executable file
@ -0,0 +1,296 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains configuration constants required to execute on different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration: HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 1
|
||||
#endif
|
||||
/* Configuration: HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 0
|
||||
#endif
|
||||
/* Configuration: USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 0
|
||||
#endif
|
||||
/* Configuration: HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 1
|
||||
#endif
|
||||
/* Configuration: HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 1
|
||||
#endif
|
||||
|
||||
/* Configuration: CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
typedef size_t CORE_TICKS;
|
||||
#elif HAS_TIME_H
|
||||
#include <time.h>
|
||||
typedef clock_t CORE_TICKS;
|
||||
#else
|
||||
/* Configuration: size_t and clock_t
|
||||
Note these need to match the size of the clock output and the xLen the processor supports
|
||||
*/
|
||||
typedef unsigned long int size_t;
|
||||
typedef unsigned long int clock_t;
|
||||
typedef clock_t CORE_TICKS;
|
||||
#endif
|
||||
|
||||
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)"
|
||||
#define MEM_LOCATION_UNSPEC 1
|
||||
#endif
|
||||
|
||||
/* Data Types:
|
||||
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant*:
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef unsigned long long ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
/* align an offset to point to a 32b value */
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
|
||||
|
||||
/* Configuration: SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile time.
|
||||
|
||||
Valid values:
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration: MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values:
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STATIC
|
||||
#endif
|
||||
|
||||
/* Configuration: MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values:
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note:
|
||||
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
|
||||
to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#endif
|
||||
|
||||
/* Configuration: USE_PTHREAD
|
||||
Sample implementation for launching parallel contexts
|
||||
This implementation uses pthread_thread_create and pthread_join.
|
||||
|
||||
Valid values:
|
||||
0 - Do not use pthreads API.
|
||||
1 - Use pthreads API
|
||||
|
||||
Note:
|
||||
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
|
||||
*/
|
||||
#ifndef USE_PTHREAD
|
||||
#define USE_PTHREAD 0
|
||||
#endif
|
||||
|
||||
/* Configuration: USE_FORK
|
||||
Sample implementation for launching parallel contexts
|
||||
This implementation uses fork, waitpid, shmget,shmat and shmdt.
|
||||
|
||||
Valid values:
|
||||
0 - Do not use fork API.
|
||||
1 - Use fork API
|
||||
|
||||
Note:
|
||||
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
|
||||
*/
|
||||
#ifndef USE_FORK
|
||||
#define USE_FORK 0
|
||||
#endif
|
||||
|
||||
/* Configuration: USE_SOCKET
|
||||
Sample implementation for launching parallel contexts
|
||||
This implementation uses fork, socket, sendto and recvfrom
|
||||
|
||||
Valid values:
|
||||
0 - Do not use fork and sockets API.
|
||||
1 - Use fork and sockets API
|
||||
|
||||
Note:
|
||||
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
|
||||
*/
|
||||
#ifndef USE_SOCKET
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration: MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values:
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 1
|
||||
#endif
|
||||
|
||||
/* Configuration: MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values:
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable: default_num_contexts
|
||||
Number of contexts to spawn in multicore context.
|
||||
Override this global value to change number of contexts used.
|
||||
|
||||
Note:
|
||||
This value may not be set higher then the <MULTITHREAD> define.
|
||||
|
||||
To experiment, you can set the <MULTITHREAD> define to the highest value expected, and use argc/argv in the <portable_init> to set this value from the command line.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
#if (MULTITHREAD>1)
|
||||
#if USE_PTHREAD
|
||||
#include <pthread.h>
|
||||
#define PARALLEL_METHOD "PThreads"
|
||||
#elif USE_FORK
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/shm.h>
|
||||
#include <string.h> /* for memcpy */
|
||||
#define PARALLEL_METHOD "Fork"
|
||||
#elif USE_SOCKET
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#define PARALLEL_METHOD "Sockets"
|
||||
#else
|
||||
#define PARALLEL_METHOD "Proprietary"
|
||||
#error "Please implement multicore functionality in core_portme.c to use multiple contexts."
|
||||
#endif /* Method for multithreading */
|
||||
#endif /* MULTITHREAD > 1 */
|
||||
|
||||
typedef struct CORE_PORTABLE_S {
|
||||
#if (MULTITHREAD>1)
|
||||
#if USE_PTHREAD
|
||||
pthread_t thread;
|
||||
#elif USE_FORK
|
||||
pid_t pid;
|
||||
int shmid;
|
||||
void *shm;
|
||||
#elif USE_SOCKET
|
||||
pid_t pid;
|
||||
int sock;
|
||||
struct sockaddr_in sa;
|
||||
#endif /* Method for multithreading */
|
||||
#endif /* MULTITHREAD>1 */
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if (SEED_METHOD==SEED_VOLATILE)
|
||||
#if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN)
|
||||
#define RUN_TYPE_FLAG 1
|
||||
#else
|
||||
#if (TOTAL_DATA_SIZE==1200)
|
||||
#define PROFILE_RUN 1
|
||||
#else
|
||||
#define PERFORMANCE_RUN 1
|
||||
#endif
|
||||
#endif
|
||||
#endif /* SEED_METHOD==SEED_VOLATILE */
|
||||
|
||||
#endif /* CORE_PORTME_H */
|
||||
149
benchmarks/coremark/riscv64-baremetal/core_portme.mak
Executable file
149
benchmarks/coremark/riscv64-baremetal/core_portme.mak
Executable file
@ -0,0 +1,149 @@
|
||||
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Original Author: Shay Gal-on
|
||||
|
||||
#File: core_portme.mak
|
||||
|
||||
# Flag: RISCVTOOLS
|
||||
# Use this flag to point to your RISCV tools
|
||||
RISCVTOOLS=$(RISCV)
|
||||
# Flag: RISCVTYPE
|
||||
# Type of toolchain to use
|
||||
RISCVTYPE=riscv64-unknown-elf
|
||||
# Flag: OUTFLAG
|
||||
# Use this flag to define how to to get an executable (e.g -o)
|
||||
OUTFLAG= -o
|
||||
# Flag: CC
|
||||
# Use this flag to define compiler to use
|
||||
# david_harris@hmc.edu 20 Nov 2021 removed full path; require
|
||||
CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
|
||||
#CC = $(RISCVTYPE)-gcc
|
||||
# Flag: CFLAGS
|
||||
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
|
||||
#PORT_CFLAGS = -O2 -static -std=gnu99
|
||||
PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld
|
||||
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
|
||||
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
|
||||
#Flag: LFLAGS_END
|
||||
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
|
||||
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
|
||||
LFLAGS_END +=
|
||||
# Flag: PORT_SRCS
|
||||
# Port specific source files can be added here
|
||||
PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S
|
||||
# Flag: LOAD
|
||||
# Define this flag if you need to load to a target, as in a cross compile environment.
|
||||
|
||||
# Flag: RUN
|
||||
# Define this flag if running does not consist of simple invocation of the binary.
|
||||
# In a cross compile environment, you need to define this.
|
||||
|
||||
#For flashing and using a tera term macro, you could use
|
||||
#LOAD = flash ADDR
|
||||
#RUN = ttpmacro coremark.ttl
|
||||
|
||||
#For copying to target and executing via SSH connection, you could use
|
||||
#LOAD = scp $(OUTFILE) user@target:~
|
||||
#RUN = ssh user@target -c
|
||||
|
||||
#For native compilation and execution
|
||||
LOAD = echo Loading done
|
||||
RUN = spike pk
|
||||
|
||||
OEXT = .o
|
||||
EXE = .bare.riscv
|
||||
|
||||
# Flag: SEPARATE_COMPILE
|
||||
# Define if you need to separate compilation from link stage.
|
||||
# In this case, you also need to define below how to create an object file, and how to link.
|
||||
ifdef SEPARATE_COMPILE
|
||||
|
||||
LD = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
|
||||
OBJOUT = -o
|
||||
LFLAGS =
|
||||
OFLAG = -o
|
||||
COUT = -c
|
||||
# Flag: PORT_OBJS
|
||||
# Port specific object files can be added here
|
||||
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
|
||||
PORT_CLEAN = *$(OEXT)
|
||||
|
||||
$(OPATH)%$(OEXT) : %.c
|
||||
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
|
||||
|
||||
endif
|
||||
|
||||
# Target: port_prebuild
|
||||
# Generate any files that are needed before actual build starts.
|
||||
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
|
||||
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
|
||||
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
|
||||
# Note - Using REBUILD=1
|
||||
#
|
||||
# Use make PGO=1 to invoke this sample processing.
|
||||
|
||||
ifdef PGO
|
||||
ifeq (,$(findstring $(PGO),gen))
|
||||
PGO_STAGE=build_pgo_gcc
|
||||
CFLAGS+=-fprofile-use
|
||||
endif
|
||||
PORT_CLEAN+=*.gcda *.gcno gmon.out
|
||||
endif
|
||||
|
||||
.PHONY: port_prebuild
|
||||
port_prebuild: $(PGO_STAGE)
|
||||
|
||||
.PHONY: build_pgo_gcc
|
||||
build_pgo_gcc:
|
||||
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
|
||||
|
||||
# Target: port_postbuild
|
||||
# Generate any files that are needed after actual build end.
|
||||
# E.g. change format to srec, bin, zip in order to be able to load into flash
|
||||
.PHONY: port_postbuild
|
||||
port_postbuild:
|
||||
|
||||
# Target: port_postrun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_postrun
|
||||
port_postrun:
|
||||
|
||||
# Target: port_prerun
|
||||
# Do platform specific after run stuff.
|
||||
# E.g. reset the board, backup the logfiles etc.
|
||||
.PHONY: port_prerun
|
||||
port_prerun:
|
||||
|
||||
# Target: port_postload
|
||||
# Do platform specific after load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_postload
|
||||
port_postload:
|
||||
|
||||
# Target: port_preload
|
||||
# Do platform specific before load stuff.
|
||||
# E.g. reset the reset power to the flash eraser
|
||||
.PHONY: port_preload
|
||||
port_preload:
|
||||
|
||||
# FLAG: OPATH
|
||||
# Path to the output folder. Default - current folder.
|
||||
OPATH = ./
|
||||
MKDIR = mkdir -p
|
||||
|
||||
# FLAG: PERL
|
||||
# Define perl executable to calculate the geomean if running separate.
|
||||
PERL=/usr/bin/perl
|
||||
237
benchmarks/coremark/riscv64-baremetal/crt.S
Normal file
237
benchmarks/coremark/riscv64-baremetal/crt.S
Normal file
@ -0,0 +1,237 @@
|
||||
# See LICENSE for license details.
|
||||
|
||||
#include "encoding.h"
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
# define LREG ld
|
||||
# define SREG sd
|
||||
# define REGBYTES 8
|
||||
#else
|
||||
# define LREG lw
|
||||
# define SREG sw
|
||||
# define REGBYTES 4
|
||||
#endif
|
||||
|
||||
.section ".text.init"
|
||||
.globl _start
|
||||
_start:
|
||||
li x1, 0
|
||||
li x2, 0
|
||||
li x3, 0
|
||||
li x4, 0
|
||||
li x5, 0
|
||||
li x6, 0
|
||||
li x7, 0
|
||||
li x8, 0
|
||||
li x9, 0
|
||||
li x10,0
|
||||
li x11,0
|
||||
li x12,0
|
||||
li x13,0
|
||||
li x14,0
|
||||
li x15,0
|
||||
li x16,0
|
||||
li x17,0
|
||||
li x18,0
|
||||
li x19,0
|
||||
li x20,0
|
||||
li x21,0
|
||||
li x22,0
|
||||
li x23,0
|
||||
li x24,0
|
||||
li x25,0
|
||||
li x26,0
|
||||
li x27,0
|
||||
li x28,0
|
||||
li x29,0
|
||||
li x30,0
|
||||
li x31,0
|
||||
|
||||
# enable FPU and accelerator if present
|
||||
li t0, MSTATUS_FS | MSTATUS_XS
|
||||
csrs mstatus, t0
|
||||
|
||||
# make sure XLEN agrees with compilation choice
|
||||
li t0, 1
|
||||
slli t0, t0, 31
|
||||
#if __riscv_xlen == 64
|
||||
bgez t0, 1f
|
||||
#else
|
||||
bltz t0, 1f
|
||||
#endif
|
||||
2:
|
||||
li a0, 1
|
||||
sw a0, tohost, t0
|
||||
j 2b
|
||||
1:
|
||||
|
||||
#ifdef __riscv_flen
|
||||
# initialize FPU if we have one
|
||||
la t0, 1f
|
||||
csrw mtvec, t0
|
||||
|
||||
fssr x0
|
||||
fmv.s.x f0, x0
|
||||
fmv.s.x f1, x0
|
||||
fmv.s.x f2, x0
|
||||
fmv.s.x f3, x0
|
||||
fmv.s.x f4, x0
|
||||
fmv.s.x f5, x0
|
||||
fmv.s.x f6, x0
|
||||
fmv.s.x f7, x0
|
||||
fmv.s.x f8, x0
|
||||
fmv.s.x f9, x0
|
||||
fmv.s.x f10,x0
|
||||
fmv.s.x f11,x0
|
||||
fmv.s.x f12,x0
|
||||
fmv.s.x f13,x0
|
||||
fmv.s.x f14,x0
|
||||
fmv.s.x f15,x0
|
||||
fmv.s.x f16,x0
|
||||
fmv.s.x f17,x0
|
||||
fmv.s.x f18,x0
|
||||
fmv.s.x f19,x0
|
||||
fmv.s.x f20,x0
|
||||
fmv.s.x f21,x0
|
||||
fmv.s.x f22,x0
|
||||
fmv.s.x f23,x0
|
||||
fmv.s.x f24,x0
|
||||
fmv.s.x f25,x0
|
||||
fmv.s.x f26,x0
|
||||
fmv.s.x f27,x0
|
||||
fmv.s.x f28,x0
|
||||
fmv.s.x f29,x0
|
||||
fmv.s.x f30,x0
|
||||
fmv.s.x f31,x0
|
||||
1:
|
||||
#endif
|
||||
|
||||
# initialize trap vector
|
||||
la t0, trap_entry
|
||||
csrw mtvec, t0
|
||||
|
||||
# initialize global pointer
|
||||
.option push
|
||||
.option norelax
|
||||
la gp, __global_pointer$
|
||||
.option pop
|
||||
|
||||
la tp, _end + 63
|
||||
and tp, tp, -64
|
||||
|
||||
# get core id
|
||||
csrr a0, mhartid
|
||||
# for now, assume only 1 core
|
||||
li a1, 1
|
||||
1:bgeu a0, a1, 1b
|
||||
|
||||
# give each core 128KB of stack + TLS
|
||||
#define STKSHIFT 17
|
||||
sll a2, a0, STKSHIFT
|
||||
add tp, tp, a2
|
||||
add sp, a0, 1
|
||||
sll sp, sp, STKSHIFT
|
||||
add sp, sp, tp
|
||||
|
||||
j _init
|
||||
|
||||
.align 2
|
||||
trap_entry:
|
||||
addi sp, sp, -272
|
||||
|
||||
SREG x1, 1*REGBYTES(sp)
|
||||
SREG x2, 2*REGBYTES(sp)
|
||||
SREG x3, 3*REGBYTES(sp)
|
||||
SREG x4, 4*REGBYTES(sp)
|
||||
SREG x5, 5*REGBYTES(sp)
|
||||
SREG x6, 6*REGBYTES(sp)
|
||||
SREG x7, 7*REGBYTES(sp)
|
||||
SREG x8, 8*REGBYTES(sp)
|
||||
SREG x9, 9*REGBYTES(sp)
|
||||
SREG x10, 10*REGBYTES(sp)
|
||||
SREG x11, 11*REGBYTES(sp)
|
||||
SREG x12, 12*REGBYTES(sp)
|
||||
SREG x13, 13*REGBYTES(sp)
|
||||
SREG x14, 14*REGBYTES(sp)
|
||||
SREG x15, 15*REGBYTES(sp)
|
||||
SREG x16, 16*REGBYTES(sp)
|
||||
SREG x17, 17*REGBYTES(sp)
|
||||
SREG x18, 18*REGBYTES(sp)
|
||||
SREG x19, 19*REGBYTES(sp)
|
||||
SREG x20, 20*REGBYTES(sp)
|
||||
SREG x21, 21*REGBYTES(sp)
|
||||
SREG x22, 22*REGBYTES(sp)
|
||||
SREG x23, 23*REGBYTES(sp)
|
||||
SREG x24, 24*REGBYTES(sp)
|
||||
SREG x25, 25*REGBYTES(sp)
|
||||
SREG x26, 26*REGBYTES(sp)
|
||||
SREG x27, 27*REGBYTES(sp)
|
||||
SREG x28, 28*REGBYTES(sp)
|
||||
SREG x29, 29*REGBYTES(sp)
|
||||
SREG x30, 30*REGBYTES(sp)
|
||||
SREG x31, 31*REGBYTES(sp)
|
||||
|
||||
csrr a0, mcause
|
||||
csrr a1, mepc
|
||||
mv a2, sp
|
||||
jal handle_trap
|
||||
csrw mepc, a0
|
||||
|
||||
# Remain in M-mode after eret
|
||||
li t0, MSTATUS_MPP
|
||||
csrs mstatus, t0
|
||||
|
||||
LREG x1, 1*REGBYTES(sp)
|
||||
LREG x2, 2*REGBYTES(sp)
|
||||
LREG x3, 3*REGBYTES(sp)
|
||||
LREG x4, 4*REGBYTES(sp)
|
||||
LREG x5, 5*REGBYTES(sp)
|
||||
LREG x6, 6*REGBYTES(sp)
|
||||
LREG x7, 7*REGBYTES(sp)
|
||||
LREG x8, 8*REGBYTES(sp)
|
||||
LREG x9, 9*REGBYTES(sp)
|
||||
LREG x10, 10*REGBYTES(sp)
|
||||
LREG x11, 11*REGBYTES(sp)
|
||||
LREG x12, 12*REGBYTES(sp)
|
||||
LREG x13, 13*REGBYTES(sp)
|
||||
LREG x14, 14*REGBYTES(sp)
|
||||
LREG x15, 15*REGBYTES(sp)
|
||||
LREG x16, 16*REGBYTES(sp)
|
||||
LREG x17, 17*REGBYTES(sp)
|
||||
LREG x18, 18*REGBYTES(sp)
|
||||
LREG x19, 19*REGBYTES(sp)
|
||||
LREG x20, 20*REGBYTES(sp)
|
||||
LREG x21, 21*REGBYTES(sp)
|
||||
LREG x22, 22*REGBYTES(sp)
|
||||
LREG x23, 23*REGBYTES(sp)
|
||||
LREG x24, 24*REGBYTES(sp)
|
||||
LREG x25, 25*REGBYTES(sp)
|
||||
LREG x26, 26*REGBYTES(sp)
|
||||
LREG x27, 27*REGBYTES(sp)
|
||||
LREG x28, 28*REGBYTES(sp)
|
||||
LREG x29, 29*REGBYTES(sp)
|
||||
LREG x30, 30*REGBYTES(sp)
|
||||
LREG x31, 31*REGBYTES(sp)
|
||||
|
||||
addi sp, sp, 272
|
||||
mret
|
||||
|
||||
.section ".tdata.begin"
|
||||
.globl _tdata_begin
|
||||
_tdata_begin:
|
||||
|
||||
.section ".tdata.end"
|
||||
.globl _tdata_end
|
||||
_tdata_end:
|
||||
|
||||
.section ".tbss.end"
|
||||
.globl _tbss_end
|
||||
_tbss_end:
|
||||
|
||||
.section ".tohost","aw",@progbits
|
||||
.align 6
|
||||
.globl tohost
|
||||
tohost: .dword 0
|
||||
.align 6
|
||||
.globl fromhost
|
||||
fromhost: .dword 0
|
||||
1471
benchmarks/coremark/riscv64-baremetal/encoding.h
Normal file
1471
benchmarks/coremark/riscv64-baremetal/encoding.h
Normal file
File diff suppressed because it is too large
Load Diff
66
benchmarks/coremark/riscv64-baremetal/link.ld
Normal file
66
benchmarks/coremark/riscv64-baremetal/link.ld
Normal file
@ -0,0 +1,66 @@
|
||||
/*======================================================================*/
|
||||
/* Proxy kernel linker script */
|
||||
/*======================================================================*/
|
||||
/* This is the linker script used when building the proxy kernel. */
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Setup */
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
/* The OUTPUT_ARCH command specifies the machine architecture where the
|
||||
argument is one of the names used in the BFD library. More
|
||||
specifically one of the entires in bfd/cpu-mips.c */
|
||||
|
||||
OUTPUT_ARCH( "riscv" )
|
||||
ENTRY(_start)
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* Sections */
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
|
||||
/* text: test code section */
|
||||
. = 0x80000000;
|
||||
.text.init : { *(.text.init) }
|
||||
|
||||
. = ALIGN(0x1000);
|
||||
.tohost : { *(.tohost) }
|
||||
|
||||
.text : { *(.text) }
|
||||
|
||||
/* data segment */
|
||||
.data : { *(.data) }
|
||||
|
||||
.sdata : {
|
||||
__global_pointer$ = . + 0x800;
|
||||
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*)
|
||||
*(.sdata .sdata.* .gnu.linkonce.s.*)
|
||||
}
|
||||
|
||||
/* bss segment */
|
||||
.sbss : {
|
||||
*(.sbss .sbss.* .gnu.linkonce.sb.*)
|
||||
*(.scommon)
|
||||
}
|
||||
.bss : { *(.bss) }
|
||||
|
||||
/* thread-local data segment */
|
||||
.tdata :
|
||||
{
|
||||
_tls_data = .;
|
||||
*(.tdata.begin)
|
||||
*(.tdata)
|
||||
*(.tdata.end)
|
||||
}
|
||||
.tbss :
|
||||
{
|
||||
*(.tbss)
|
||||
*(.tbss.end)
|
||||
}
|
||||
|
||||
/* End of uninitalized data segement */
|
||||
_end = .;
|
||||
}
|
||||
|
||||
1072
benchmarks/coremark/riscv64-baremetal/syscallbackup.c
Normal file
1072
benchmarks/coremark/riscv64-baremetal/syscallbackup.c
Normal file
File diff suppressed because it is too large
Load Diff
540
benchmarks/coremark/riscv64-baremetal/syscalls.c
Normal file
540
benchmarks/coremark/riscv64-baremetal/syscalls.c
Normal file
@ -0,0 +1,540 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <limits.h>
|
||||
#include <sys/signal.h>
|
||||
#include "util.h"
|
||||
#include "coremark.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#define SYS_write 64
|
||||
|
||||
#undef strcmp
|
||||
|
||||
extern volatile uint64_t tohost;
|
||||
extern volatile uint64_t fromhost;
|
||||
|
||||
|
||||
void _send_char(char c) {
|
||||
/*#error "You must implement the method _send_char to use this file!\n";
|
||||
*/
|
||||
volatile unsigned char *THR=(unsigned char *)0x10000000;
|
||||
volatile unsigned char *LSR=(unsigned char *)0x10000005;
|
||||
|
||||
while(!(*LSR&0b100000));
|
||||
*THR=c;
|
||||
while(!(*LSR&0b100000));
|
||||
}
|
||||
|
||||
int sendstring(const char *p){
|
||||
int n=0;
|
||||
while (*p) {
|
||||
_send_char(*p);
|
||||
n++;
|
||||
p++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2)
|
||||
{
|
||||
volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
|
||||
magic_mem[0] = which;
|
||||
magic_mem[1] = arg0;
|
||||
magic_mem[2] = arg1;
|
||||
magic_mem[3] = arg2;
|
||||
__sync_synchronize();
|
||||
|
||||
tohost = (uintptr_t)magic_mem;
|
||||
while (fromhost == 0)
|
||||
;
|
||||
fromhost = 0;
|
||||
|
||||
__sync_synchronize();
|
||||
return magic_mem[0];
|
||||
}
|
||||
|
||||
#define NUM_COUNTERS 3
|
||||
static uintptr_t counters[NUM_COUNTERS];
|
||||
static char* counter_names[NUM_COUNTERS];
|
||||
|
||||
void setStats(int enable)
|
||||
{
|
||||
int i = 0;
|
||||
#define READ_CTR(name) do { \
|
||||
while (i >= NUM_COUNTERS) ; \
|
||||
uintptr_t csr = read_csr(name); \
|
||||
if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
|
||||
counters[i++] = csr; \
|
||||
} while (0)
|
||||
|
||||
READ_CTR(mcycle);
|
||||
READ_CTR(minstret);
|
||||
READ_CTR(mhpmcounter3);
|
||||
READ_CTR(mhpmcounter4);
|
||||
READ_CTR(mhpmcounter5);
|
||||
READ_CTR(mhpmcounter6);
|
||||
READ_CTR(mhpmcounter7);
|
||||
READ_CTR(mhpmcounter8);
|
||||
READ_CTR(mhpmcounter9);
|
||||
READ_CTR(mhpmcounter10);
|
||||
READ_CTR(mhpmcounter11);
|
||||
READ_CTR(mhpmcounter12);
|
||||
|
||||
#undef READ_CTR
|
||||
}
|
||||
|
||||
void __attribute__((noreturn)) tohost_exit(uintptr_t code)
|
||||
{
|
||||
tohost = (code << 1) | 1;
|
||||
asm ("ecall");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
|
||||
{
|
||||
tohost_exit(1337);
|
||||
}
|
||||
|
||||
void exit(int code)
|
||||
{
|
||||
tohost_exit(code);
|
||||
}
|
||||
|
||||
void abort()
|
||||
{
|
||||
exit(128 + SIGABRT);
|
||||
}
|
||||
|
||||
void printstr(const char* s)
|
||||
{
|
||||
syscall(SYS_write, 1, (uintptr_t)s, strlen(s));
|
||||
}
|
||||
|
||||
void __attribute__((weak)) thread_entry(int cid, int nc)
|
||||
{
|
||||
// multi-threaded programs override this function.
|
||||
// for the case of single-threaded programs, only let core 0 proceed.
|
||||
while (cid != 0);
|
||||
}
|
||||
|
||||
int __attribute__((weak)) main(int argc, char** argv)
|
||||
{
|
||||
// single-threaded programs override this function.
|
||||
printstr("Implement main(), foo!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void init_tls()
|
||||
{
|
||||
register void* thread_pointer asm("tp");
|
||||
extern char _tls_data;
|
||||
extern __thread char _tdata_begin, _tdata_end, _tbss_end;
|
||||
size_t tdata_size = &_tdata_end - &_tdata_begin;
|
||||
memcpy(thread_pointer, &_tls_data, tdata_size);
|
||||
size_t tbss_size = &_tbss_end - &_tdata_end;
|
||||
memset(thread_pointer + tdata_size, 0, tbss_size);
|
||||
}
|
||||
|
||||
void _init(int cid, int nc)
|
||||
{
|
||||
init_tls();
|
||||
thread_entry(cid, nc);
|
||||
|
||||
// only single-threaded programs should ever get here.
|
||||
int ret = main(0, 0);
|
||||
|
||||
char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
|
||||
char* pbuf = buf;
|
||||
for (int i = 0; i < NUM_COUNTERS; i++)
|
||||
if (counters[i])
|
||||
pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
|
||||
if (pbuf != buf)
|
||||
printstr(buf);
|
||||
counters[3] = read_csr(mhpmcounter3) - counters[3];
|
||||
counters[4] = read_csr(mhpmcounter4) - counters[4];
|
||||
counters[5] = read_csr(mhpmcounter5) - counters[5];
|
||||
counters[6] = read_csr(mhpmcounter6) - counters[6];
|
||||
counters[7] = read_csr(mhpmcounter7) - counters[7];
|
||||
counters[8] = read_csr(mhpmcounter8) - counters[8];
|
||||
counters[9] = read_csr(mhpmcounter9) - counters[9];
|
||||
counters[10] = read_csr(mhpmcounter10) - counters[10];
|
||||
counters[11] = read_csr(mhpmcounter11) - counters[11];
|
||||
counters[12] = read_csr(mhpmcounter12) - counters[12];
|
||||
|
||||
ee_printf("Load Stalls %d\n", counters[3]);
|
||||
ee_printf("D-Cache Accesses %d\n", counters[11]);
|
||||
ee_printf("D-Cache Misses %d\n", counters[12]);
|
||||
ee_printf("Branches %d\n", counters[5]);
|
||||
ee_printf("Branches Miss Predictions %d\n", counters[4]);
|
||||
ee_printf("BTB Misses %d\n", counters[6]);
|
||||
ee_printf("Jump, JAL, JALR %d\n", counters[7]);
|
||||
ee_printf("RAS Wrong %d\n", counters[8]);
|
||||
ee_printf("Returns %d\n", counters[9]);
|
||||
ee_printf("BP Class Wrong %d\n", counters[10]);
|
||||
ee_printf("Done printing performance counters\n");
|
||||
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
#undef putchar
|
||||
int putchar(int ch)
|
||||
{
|
||||
/*static __thread char buf[64] __attribute__((aligned(64)));
|
||||
static __thread int buflen = 0;
|
||||
|
||||
buf[buflen++] = ch;
|
||||
|
||||
if (ch == '\n' || buflen == sizeof(buf))
|
||||
{
|
||||
syscall(SYS_write, 1, (uintptr_t)buf, buflen);
|
||||
buflen = 0;
|
||||
}
|
||||
|
||||
return 0;*/
|
||||
_send_char(ch);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
void printhex(uint64_t x)
|
||||
{
|
||||
char str[17];
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10);
|
||||
x >>= 4;
|
||||
}
|
||||
str[16] = 0;
|
||||
|
||||
printstr(str);
|
||||
}
|
||||
|
||||
static inline void printnum(void (*putch)(int, void**), void **putdat,
|
||||
unsigned long long num, unsigned base, int width, int padc)
|
||||
{
|
||||
unsigned digs[sizeof(num)*CHAR_BIT];
|
||||
int pos = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
digs[pos++] = num % base;
|
||||
if (num < base)
|
||||
break;
|
||||
num /= base;
|
||||
}
|
||||
|
||||
while (width-- > pos)
|
||||
putch(padc, putdat);
|
||||
|
||||
while (pos-- > 0)
|
||||
putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
|
||||
}
|
||||
|
||||
static unsigned long long getuint(va_list *ap, int lflag)
|
||||
{
|
||||
if (lflag >= 2)
|
||||
return va_arg(*ap, unsigned long long);
|
||||
else if (lflag)
|
||||
return va_arg(*ap, unsigned long);
|
||||
else
|
||||
return va_arg(*ap, unsigned int);
|
||||
}
|
||||
|
||||
static long long getint(va_list *ap, int lflag)
|
||||
{
|
||||
if (lflag >= 2)
|
||||
return va_arg(*ap, long long);
|
||||
else if (lflag)
|
||||
return va_arg(*ap, long);
|
||||
else
|
||||
return va_arg(*ap, int);
|
||||
}
|
||||
|
||||
static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
|
||||
{
|
||||
register const char* p;
|
||||
const char* last_fmt;
|
||||
register int ch, err;
|
||||
unsigned long long num;
|
||||
int base, lflag, width, precision, altflag;
|
||||
char padc;
|
||||
|
||||
while (1) {
|
||||
while ((ch = *(unsigned char *) fmt) != '%') {
|
||||
if (ch == '\0')
|
||||
return;
|
||||
fmt++;
|
||||
putch(ch, putdat);
|
||||
}
|
||||
fmt++;
|
||||
|
||||
// Process a %-escape sequence
|
||||
last_fmt = fmt;
|
||||
padc = ' ';
|
||||
width = -1;
|
||||
precision = -1;
|
||||
lflag = 0;
|
||||
altflag = 0;
|
||||
reswitch:
|
||||
switch (ch = *(unsigned char *) fmt++) {
|
||||
|
||||
// flag to pad on the right
|
||||
case '-':
|
||||
padc = '-';
|
||||
goto reswitch;
|
||||
|
||||
// flag to pad with 0's instead of spaces
|
||||
case '0':
|
||||
padc = '0';
|
||||
goto reswitch;
|
||||
|
||||
// width field
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
for (precision = 0; ; ++fmt) {
|
||||
precision = precision * 10 + ch - '0';
|
||||
ch = *fmt;
|
||||
if (ch < '0' || ch > '9')
|
||||
break;
|
||||
}
|
||||
goto process_precision;
|
||||
|
||||
case '*':
|
||||
precision = va_arg(ap, int);
|
||||
goto process_precision;
|
||||
|
||||
case '.':
|
||||
if (width < 0)
|
||||
width = 0;
|
||||
goto reswitch;
|
||||
|
||||
case '#':
|
||||
altflag = 1;
|
||||
goto reswitch;
|
||||
|
||||
process_precision:
|
||||
if (width < 0)
|
||||
width = precision, precision = -1;
|
||||
goto reswitch;
|
||||
|
||||
// long flag (doubled for long long)
|
||||
case 'l':
|
||||
lflag++;
|
||||
goto reswitch;
|
||||
|
||||
// character
|
||||
case 'c':
|
||||
putch(va_arg(ap, int), putdat);
|
||||
break;
|
||||
|
||||
// string
|
||||
case 's':
|
||||
if ((p = va_arg(ap, char *)) == NULL)
|
||||
p = "(null)";
|
||||
if (width > 0 && padc != '-')
|
||||
for (width -= strnlen(p, precision); width > 0; width--)
|
||||
putch(padc, putdat);
|
||||
for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
|
||||
putch(ch, putdat);
|
||||
p++;
|
||||
}
|
||||
for (; width > 0; width--)
|
||||
putch(' ', putdat);
|
||||
break;
|
||||
|
||||
// (signed) decimal
|
||||
case 'd':
|
||||
num = getint(&ap, lflag);
|
||||
if ((long long) num < 0) {
|
||||
putch('-', putdat);
|
||||
num = -(long long) num;
|
||||
}
|
||||
base = 10;
|
||||
goto signed_number;
|
||||
|
||||
// unsigned decimal
|
||||
case 'u':
|
||||
base = 10;
|
||||
goto unsigned_number;
|
||||
|
||||
// (unsigned) octal
|
||||
case 'o':
|
||||
// should do something with padding so it's always 3 octits
|
||||
base = 8;
|
||||
goto unsigned_number;
|
||||
|
||||
// pointer
|
||||
case 'p':
|
||||
static_assert(sizeof(long) == sizeof(void*));
|
||||
lflag = 1;
|
||||
putch('0', putdat);
|
||||
putch('x', putdat);
|
||||
/* fall through to 'x' */
|
||||
|
||||
// (unsigned) hexadecimal
|
||||
case 'X':
|
||||
case 'x':
|
||||
base = 16;
|
||||
unsigned_number:
|
||||
num = getuint(&ap, lflag);
|
||||
signed_number:
|
||||
printnum(putch, putdat, num, base, width, padc);
|
||||
break;
|
||||
|
||||
// escaped '%' character
|
||||
case '%':
|
||||
putch(ch, putdat);
|
||||
break;
|
||||
|
||||
// unrecognized escape sequence - just print it literally
|
||||
default:
|
||||
putch('%', putdat);
|
||||
fmt = last_fmt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int printf(const char* fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
|
||||
vprintfmt((void*)putchar, 0, fmt, ap);
|
||||
|
||||
va_end(ap);
|
||||
return 0; // incorrect return value, but who cares, anyway?
|
||||
}
|
||||
|
||||
int puts(const char* s)
|
||||
{
|
||||
printf(s);
|
||||
printf("\n");
|
||||
return 0; // incorrect return value, but who cares, anyway?
|
||||
}
|
||||
|
||||
int sprintf(char* str, const char* fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char* str0 = str;
|
||||
va_start(ap, fmt);
|
||||
|
||||
void sprintf_putch(int ch, void** data)
|
||||
{
|
||||
char** pstr = (char**)data;
|
||||
**pstr = ch;
|
||||
(*pstr)++;
|
||||
}
|
||||
|
||||
vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
|
||||
*str = 0;
|
||||
|
||||
va_end(ap);
|
||||
return str - str0;
|
||||
}
|
||||
|
||||
void* memcpy(void* dest, const void* src, size_t len)
|
||||
{
|
||||
if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) {
|
||||
const uintptr_t* s = src;
|
||||
uintptr_t *d = dest;
|
||||
while (d < (uintptr_t*)(dest + len))
|
||||
*d++ = *s++;
|
||||
} else {
|
||||
const char* s = src;
|
||||
char *d = dest;
|
||||
while (d < (char*)(dest + len))
|
||||
*d++ = *s++;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
void* memset(void* dest, int byte, size_t len)
|
||||
{
|
||||
if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) {
|
||||
uintptr_t word = byte & 0xFF;
|
||||
word |= word << 8;
|
||||
word |= word << 16;
|
||||
word |= word << 16 << 16;
|
||||
|
||||
uintptr_t *d = dest;
|
||||
while (d < (uintptr_t*)(dest + len)){
|
||||
*d = word;
|
||||
d++;}
|
||||
} else {
|
||||
char *d = dest;
|
||||
while (d < (char*)(dest + len)){
|
||||
*d = byte;
|
||||
d++;}
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
size_t strlen(const char *s)
|
||||
{
|
||||
const char *p = s;
|
||||
while (*p)
|
||||
p++;
|
||||
return p - s;
|
||||
}
|
||||
|
||||
size_t strnlen(const char *s, size_t n)
|
||||
{
|
||||
const char *p = s;
|
||||
while (n-- && *p)
|
||||
p++;
|
||||
return p - s;
|
||||
}
|
||||
|
||||
int strcmp(const char* s1, const char* s2)
|
||||
{
|
||||
unsigned char c1, c2;
|
||||
|
||||
do {
|
||||
c1 = *s1++;
|
||||
c2 = *s2++;
|
||||
} while (c1 != 0 && c1 == c2);
|
||||
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
char* strcpy(char* dest, const char* src)
|
||||
{
|
||||
char* d = dest;
|
||||
while ((*d++ = *src++))
|
||||
;
|
||||
return dest;
|
||||
}
|
||||
|
||||
long atol(const char* str)
|
||||
{
|
||||
long res = 0;
|
||||
int sign = 0;
|
||||
|
||||
while (*str == ' ')
|
||||
str++;
|
||||
|
||||
if (*str == '-' || *str == '+') {
|
||||
sign = *str == '-';
|
||||
str++;
|
||||
}
|
||||
|
||||
while (*str) {
|
||||
res *= 10;
|
||||
res += *str++ - '0';
|
||||
}
|
||||
|
||||
return sign ? -res : res;
|
||||
}
|
||||
90
benchmarks/coremark/riscv64-baremetal/util.h
Normal file
90
benchmarks/coremark/riscv64-baremetal/util.h
Normal file
@ -0,0 +1,90 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef __UTIL_H
|
||||
#define __UTIL_H
|
||||
|
||||
extern void setStats(int enable);
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
|
||||
|
||||
static int verify(int n, const volatile int* test, const int* verify)
|
||||
{
|
||||
int i;
|
||||
// Unrolled for faster verification
|
||||
for (i = 0; i < n/2*2; i+=2)
|
||||
{
|
||||
int t0 = test[i], t1 = test[i+1];
|
||||
int v0 = verify[i], v1 = verify[i+1];
|
||||
if (t0 != v0) return i+1;
|
||||
if (t1 != v1) return i+2;
|
||||
}
|
||||
if (n % 2 != 0 && test[n-1] != verify[n-1])
|
||||
return n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verifyDouble(int n, const volatile double* test, const double* verify)
|
||||
{
|
||||
int i;
|
||||
// Unrolled for faster verification
|
||||
for (i = 0; i < n/2*2; i+=2)
|
||||
{
|
||||
double t0 = test[i], t1 = test[i+1];
|
||||
double v0 = verify[i], v1 = verify[i+1];
|
||||
int eq1 = t0 == v0, eq2 = t1 == v1;
|
||||
if (!(eq1 & eq2)) return i+1+eq1;
|
||||
}
|
||||
if (n % 2 != 0 && test[n-1] != verify[n-1])
|
||||
return n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __attribute__((noinline)) barrier(int ncores)
|
||||
{
|
||||
static volatile int sense;
|
||||
static volatile int count;
|
||||
static __thread int threadsense;
|
||||
|
||||
__sync_synchronize();
|
||||
|
||||
threadsense = !threadsense;
|
||||
if (__sync_fetch_and_add(&count, 1) == ncores-1)
|
||||
{
|
||||
count = 0;
|
||||
sense = threadsense;
|
||||
}
|
||||
else while(sense != threadsense)
|
||||
;
|
||||
|
||||
__sync_synchronize();
|
||||
}
|
||||
|
||||
static uint64_t lfsr(uint64_t x)
|
||||
{
|
||||
uint64_t bit = (x ^ (x >> 1)) & 1;
|
||||
return (x >> 1) | (bit << 62);
|
||||
}
|
||||
|
||||
static uintptr_t insn_len(uintptr_t pc)
|
||||
{
|
||||
return (*(unsigned short*)pc & 3) ? 4 : 2;
|
||||
}
|
||||
|
||||
#ifdef __riscv
|
||||
#include "encoding.h"
|
||||
#endif
|
||||
|
||||
#define stringify_1(s) #s
|
||||
#define stringify(s) stringify_1(s)
|
||||
#define stats(code, iter) do { \
|
||||
unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \
|
||||
code; \
|
||||
_c += read_csr(mcycle), _i += read_csr(minstret); \
|
||||
if (cid == 0) \
|
||||
printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
|
||||
stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
|
||||
} while(0)
|
||||
|
||||
#endif //__UTIL_H
|
||||
@ -2,6 +2,8 @@
|
||||
# Expanded and developed by Daniel Torres dtorres@hmc.edu
|
||||
# Compile Embench for Wally
|
||||
|
||||
embench_dir = ../../addins/embench-iot
|
||||
|
||||
all: sim size
|
||||
|
||||
allClean: clean all
|
||||
@ -10,12 +12,12 @@ build: buildspeed buildsize
|
||||
|
||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
|
||||
buildspeed:
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
|
||||
find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
||||
$(embench_dir)/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
|
||||
find $(embench_dir)/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
||||
|
||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
|
||||
buildsize:
|
||||
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
||||
$(embench_dir)/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
||||
|
||||
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
|
||||
sim: modelsim_build_memfile modelsim_run speed
|
||||
@ -27,36 +29,36 @@ modelsim_run:
|
||||
|
||||
# builds the objdump based on the compiled c elf files
|
||||
objdump: buildspeed
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
|
||||
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
|
||||
|
||||
# build memfiles, objdump.lab and objdump.addr files
|
||||
modelsim_build_memfile: objdump
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
|
||||
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
|
||||
find $(embench_dir)/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
|
||||
|
||||
# builds the tests for speed, runs them on spike and then launches python script to present results
|
||||
# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
|
||||
# you'll need to manually remove one of the two .output files, or run make clean
|
||||
spike: buildspeed spikecmd speed
|
||||
spike: buildspeed objdump spike_run speed
|
||||
|
||||
# command to run spike on all of the benchmarks
|
||||
spike_run: buildspeed
|
||||
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
|
||||
spike_run:
|
||||
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
|
||||
|
||||
# python wrapper to present results of embench size benchmark
|
||||
size: buildsize
|
||||
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
|
||||
$(embench_dir)/benchmark_size.py --builddir=bd_size --json-output > wallySize.json
|
||||
|
||||
# python wrapper to present results of embench speed benchmark
|
||||
speed:
|
||||
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
|
||||
$(embench_dir)/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeed.json
|
||||
|
||||
# deletes all files
|
||||
clean:
|
||||
rm -rf ../../addins/embench-iot/bd_speed/
|
||||
rm -rf ../../addins/embench-iot/bd_size/
|
||||
rm -rf $(embench_dir)/bd_speed/
|
||||
rm -rf $(embench_dir)/bd_size/
|
||||
|
||||
allclean: clean
|
||||
rm -rf ../../addins/embench-iot/logs/
|
||||
rm -rf $(embench_dir)/logs/
|
||||
|
||||
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c
|
||||
81
benchmarks/graphGen.py
Executable file
81
benchmarks/graphGen.py
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python
|
||||
import subprocess
|
||||
import sys
|
||||
import json
|
||||
import plotly.graph_objects as go
|
||||
|
||||
coremarkData = {}
|
||||
embenchData = {}
|
||||
debug = True
|
||||
|
||||
def loadCoremark():
|
||||
"""loads the coremark data dictionary"""
|
||||
coremarkPath = "riscv-coremark/work/coremark.sim.log"
|
||||
|
||||
keywordlist = ["CoreMark 1.0", "CoreMark Size", "MTIME", "MINSTRET", "Branches Miss Predictions", "BTB Misses"]
|
||||
for keyword in keywordlist:
|
||||
bashInst = "cat " + coremarkPath + " | grep \"" + keyword + "\" | cut -d \':\' -f 2 | cut -d \" \" -f 2 | tail -1"
|
||||
result = subprocess.run(bashInst, stdout=subprocess.PIPE, shell=True)
|
||||
if (debug): print(result)
|
||||
coremarkData[keyword] = int(result.stdout)
|
||||
if (debug): print(coremarkData)
|
||||
return coremarkData
|
||||
|
||||
def loadEmbench():
|
||||
"""loads the embench data dictionary"""
|
||||
embenchPath = "embench/wallySpeed.json"
|
||||
f = open(embenchPath)
|
||||
embenchData = json.load(f)
|
||||
if (debug): print(embenchData)
|
||||
return embenchData
|
||||
|
||||
def graphEmbench(embenchData):
|
||||
ydata = list(embenchData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
|
||||
xdata = list(embenchData["speed results"]["detailed speed results"].values()) + [embenchData["speed results"]["speed geometric mean"],embenchData["speed results"]["speed geometric sd"],embenchData["speed results"]["speed geometric range"]]
|
||||
fig = go.Figure(go.Bar(
|
||||
y=ydata,
|
||||
x=xdata,
|
||||
orientation='h'))
|
||||
|
||||
fig.show()
|
||||
|
||||
|
||||
def main():
|
||||
coremarkData = loadCoremark()
|
||||
embenchData = loadEmbench()
|
||||
graphEmbench(embenchData)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
# x =
|
||||
# y =
|
||||
|
||||
# df = px.data.tips()
|
||||
# fig = px.bar(df, x="total_bill", y="day", orientation='h')
|
||||
# fig.show()
|
||||
# import plotly.express as px
|
||||
|
||||
|
||||
# result = sp.run(['ls', '-l'], stdout=sp.PIPE)
|
||||
# result.stdout
|
||||
|
||||
# fig = go.Figure( go.Bar(
|
||||
# x=[],
|
||||
# y=[],
|
||||
# color="species",
|
||||
# facet_col="species",
|
||||
# title="Using update_traces() With Plotly Express Figures"),
|
||||
# orientation='h')
|
||||
|
||||
# fig.show()
|
||||
|
||||
#
|
||||
# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log
|
||||
# "ls -Art ../addins/embench-iot/logs/*size* | tail -n 1 " # gets most recent embench speed log
|
||||
|
||||
## get coremark score
|
||||
|
||||
# cat coremarkPath | grep "CoreMark 1.0" | cut -d ':' -f 2 | cut -d " " -f 2
|
||||
# cat coremarkPath | grep "MTIME" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
|
||||
# cat coremarkPath | grep "MINSTRET" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
|
||||
@ -166,17 +166,17 @@ void _init(int cid, int nc)
|
||||
counters[11] = read_csr(mhpmcounter11) - counters[11];
|
||||
counters[12] = read_csr(mhpmcounter12) - counters[12];
|
||||
|
||||
ee_printf("Load Stalls %d\n", counters[3]);
|
||||
ee_printf("D-Cache Accesses %d\n", counters[11]);
|
||||
ee_printf("D-Cache Misses %d\n", counters[12]);
|
||||
ee_printf("Branches %d\n", counters[5]);
|
||||
ee_printf("Branches Miss Predictions %d\n", counters[4]);
|
||||
ee_printf("BTB Misses %d\n", counters[6]);
|
||||
ee_printf("Jump, JAL, JALR %d\n", counters[7]);
|
||||
ee_printf("RAS Wrong %d\n", counters[8]);
|
||||
ee_printf("Returns %d\n", counters[9]);
|
||||
ee_printf("BP Class Wrong %d\n", counters[10]);
|
||||
ee_printf("Done printing performance counters\n");
|
||||
ee_printf("Load Stalls : %d\n", counters[3]);
|
||||
ee_printf("D-Cache Accesses : %d\n", counters[11]);
|
||||
ee_printf("D-Cache Misses : %d\n", counters[12]);
|
||||
ee_printf("Branches : %d\n", counters[5]);
|
||||
ee_printf("Branches Miss Predictions : %d\n", counters[4]);
|
||||
ee_printf("BTB Misses : %d\n", counters[6]);
|
||||
ee_printf("Jump, JAL, JALR : %d\n", counters[7]);
|
||||
ee_printf("RAS Wrong : %d\n", counters[8]);
|
||||
ee_printf("Returns : %d\n", counters[9]);
|
||||
ee_printf("BP Class Wrong : %d\n", counters[10]);
|
||||
ee_printf("Done printing performance counters : \n");
|
||||
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
23
examples/verilog/fma/Makefile
Normal file
23
examples/verilog/fma/Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
# Makefile
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O3
|
||||
LIBS = -lm
|
||||
LFLAGS = -L.
|
||||
# Link against the riscv-isa-sim version of SoftFloat rather than
|
||||
# the regular version to get RISC-V NaN behavior
|
||||
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
|
||||
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
|
||||
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
|
||||
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
|
||||
SRCS = $(wildcard *.c)
|
||||
|
||||
PROGS = $(patsubst %.c,%,$(SRCS))
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
%: %.c
|
||||
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
|
||||
|
||||
clean:
|
||||
rm -f $(PROGS)
|
||||
4225
examples/verilog/fma/baby_torture.tv
Normal file
4225
examples/verilog/fma/baby_torture.tv
Normal file
File diff suppressed because it is too large
Load Diff
1057
examples/verilog/fma/baby_torture_rz.tv
Normal file
1057
examples/verilog/fma/baby_torture_rz.tv
Normal file
File diff suppressed because it is too large
Load Diff
23
examples/verilog/fma/fma.do
Normal file
23
examples/verilog/fma/fma.do
Normal file
@ -0,0 +1,23 @@
|
||||
# fma.do
|
||||
#
|
||||
# run with vsim -do "do fma.do"
|
||||
# add -c before -do for batch simulation
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
vlib worklib
|
||||
|
||||
vlog -lint -sv -work worklib fma16.v testbench.v
|
||||
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
|
||||
vsim -lib worklib testbenchopt
|
||||
|
||||
add wave sim:/testbench_fma16/clk
|
||||
add wave sim:/testbench_fma16/reset
|
||||
add wave sim:/testbench_fma16/x
|
||||
add wave sim:/testbench_fma16/y
|
||||
add wave sim:/testbench_fma16/z
|
||||
add wave sim:/testbench_fma16/result
|
||||
add wave sim:/testbench_fma16/rexpected
|
||||
|
||||
run -all
|
||||
268
examples/verilog/fma/fma16.v
Normal file
268
examples/verilog/fma/fma16.v
Normal file
@ -0,0 +1,268 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
`define FFLEN 16
|
||||
`define Nf 10
|
||||
`define Ne 5
|
||||
`define BIAS 15
|
||||
`define EMIN (-(2**(`Ne-1)-1))
|
||||
`define EMAX (2**(`Ne-1)-1)
|
||||
|
||||
`define NaN 16'h7E00
|
||||
`define INF 15'h7C00
|
||||
|
||||
// rounding modes *** update
|
||||
`define RZ 3'b00
|
||||
`define RNE 3'b01
|
||||
`define RM 3'b10
|
||||
`define RP 3'b11
|
||||
|
||||
module fma16(
|
||||
input logic [`FFLEN-1:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [`FFLEN-1:0] result);
|
||||
|
||||
logic [`Nf:0] xm, ym, zm; // U1.Nf
|
||||
logic [`Ne-1:0] xe, ye, ze; // B_Ne
|
||||
logic xs, ys, zs;
|
||||
logic zs1; // sign before optional negation
|
||||
logic [2*`Nf+1:0] pm; // U2.2Nf
|
||||
logic [`Ne:0] pe; // B_Ne+1
|
||||
logic ps; // sign of product
|
||||
logic [22:0] rm;
|
||||
logic [`Ne+1:0] re;
|
||||
logic rs;
|
||||
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
|
||||
logic [`Ne+1:0] re2;
|
||||
|
||||
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
|
||||
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
|
||||
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
|
||||
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
|
||||
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
|
||||
endmodule
|
||||
|
||||
module mult16(
|
||||
input logic mul,
|
||||
input logic [`Nf:0] xm, ym,
|
||||
input logic [`Ne-1:0] xe, ye,
|
||||
input logic xs, ys,
|
||||
output logic [2*`Nf+1:0] pm,
|
||||
output logic [`Ne:0] pe,
|
||||
output logic ps);
|
||||
|
||||
// only multiply if mul = 1
|
||||
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
|
||||
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
|
||||
assign ps = xs ^ ys; // negative if X xor Y are negative
|
||||
endmodule
|
||||
|
||||
module add16(
|
||||
input logic add,
|
||||
input logic [2*`Nf+1:0] pm, // U2.2Nf
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [`Ne:0] pe, // B_Ne+1
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic ps, zs,
|
||||
input logic negz,
|
||||
output logic [22:0] rm,
|
||||
output logic [`Ne+1:0] re, // B_Ne+2
|
||||
output logic [`Ne+1:0] re2,
|
||||
output logic rs);
|
||||
|
||||
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
|
||||
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
|
||||
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
|
||||
logic [`Nf-1:0] prezsticky;
|
||||
logic zsticky;
|
||||
logic effectivesub;
|
||||
logic rs0;
|
||||
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
|
||||
logic [`Ne:0] re1;
|
||||
|
||||
// Alignment shift
|
||||
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
|
||||
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
|
||||
always_comb // AlignCount mux; see Muller page 254
|
||||
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
|
||||
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
|
||||
else begin AlignCnt = 0; re = {2'b0, ze}; end
|
||||
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
|
||||
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
|
||||
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
|
||||
|
||||
// Effective subtraction
|
||||
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
|
||||
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
|
||||
|
||||
// Adder
|
||||
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
|
||||
assign rs0 = r[`Nf*3+7]; // sign of the initial result
|
||||
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
|
||||
|
||||
// Sign Logic
|
||||
assign rs = ps ^ rs0; // flip the sign if necessary
|
||||
|
||||
// Leading zero counter
|
||||
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
|
||||
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
|
||||
|
||||
// Normalization shift
|
||||
always_comb // NormCount mux
|
||||
if (ExpDiff < 3) begin
|
||||
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
|
||||
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
|
||||
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
|
||||
assign rnormed = r2 << NormCnt; // *** update sticky
|
||||
/* temporarily comment out to start synth
|
||||
|
||||
// One-bit secondary normalization
|
||||
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
|
||||
else begin // *** handle sticky
|
||||
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
|
||||
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
|
||||
else begin rnormed2 = rnormed << 1; re2 = re-1; end
|
||||
end
|
||||
|
||||
// round
|
||||
assign l = rnormed2[***]; // least significant bit
|
||||
assign r = rnormed2[***-1]; // rounding bit
|
||||
assign s = ***; // sticky bit
|
||||
always_comb
|
||||
case (roundmode)
|
||||
RZ: roundup = 0;
|
||||
RP: roundup = ~rs & (r | s);
|
||||
RM: roundup = rs & (r | s);
|
||||
RNE: roundup = r & (s | l);
|
||||
default: roundup = 0;
|
||||
endcase
|
||||
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
|
||||
*/
|
||||
|
||||
// *** need to handle rounding to MAXNUM vs. INFINITY
|
||||
|
||||
// add or pass product through
|
||||
/* assign rm = add ? arm : {1'b0, pm};
|
||||
assign re = add ? are : {1'b0, pe};
|
||||
assign rs = add ? ars : ps; */
|
||||
endmodule
|
||||
|
||||
module lzc(
|
||||
input logic [`Nf*3+7:0] r2,
|
||||
output logic [`Ne:0] leadingzeros
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module postproc16(
|
||||
input logic [1:0] roundmode,
|
||||
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
|
||||
input logic [22:0] rm,
|
||||
input logic [`Nf:0] zm, // U1.Nf
|
||||
input logic [6:0] re,
|
||||
input logic [`Ne-1:0] ze, // B_Ne
|
||||
input logic rs, zs, ps,
|
||||
input logic [`Ne+1:0] re2,
|
||||
output logic [15:0] result);
|
||||
|
||||
logic [9:0] uf, uff;
|
||||
logic [6:0] ue;
|
||||
logic [6:0] ueb, uebiased;
|
||||
logic invalid;
|
||||
|
||||
// Special cases
|
||||
// *** not handling signaling NaN
|
||||
// *** also add overflow/underflow/inexact
|
||||
always_comb begin
|
||||
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
|
||||
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
|
||||
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
|
||||
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
|
||||
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
|
||||
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
|
||||
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
|
||||
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
|
||||
end
|
||||
|
||||
always_comb
|
||||
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
|
||||
ue = re + 7'b1;
|
||||
uf = rm[20:11];
|
||||
end else begin // no normalization shift needed
|
||||
ue = re;
|
||||
uf = rm[19:10];
|
||||
end
|
||||
|
||||
// overflow
|
||||
always_comb begin
|
||||
ueb = ue-7'd15;
|
||||
if (ue >= 7'd46) begin // overflow
|
||||
/* uebiased = 7'd30;
|
||||
uff = 10'h3ff; */
|
||||
end else begin
|
||||
uebiased = ue-7'd15;
|
||||
uff = uf;
|
||||
end
|
||||
end
|
||||
|
||||
assign result = {rs, uebiased[4:0], uff};
|
||||
|
||||
// add special case handling for zeros, NaN, Infinity
|
||||
endmodule
|
||||
|
||||
module signadj16(
|
||||
input logic negr, negz,
|
||||
input logic xs, ys, zs1,
|
||||
output logic ps, zs);
|
||||
|
||||
assign ps = xs ^ ys; // sign of product
|
||||
assign zs = zs1 ^ negz; // sign of addend
|
||||
endmodule
|
||||
|
||||
module unpack16(
|
||||
input logic [15:0] x, y, z,
|
||||
output logic [10:0] xm, ym, zm,
|
||||
output logic [4:0] xe, ye, ze,
|
||||
output logic xs, ys, zs,
|
||||
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
|
||||
|
||||
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
|
||||
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
|
||||
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
|
||||
endmodule
|
||||
|
||||
module unpacknum16(
|
||||
input logic [15:0] num,
|
||||
output logic [10:0] m,
|
||||
output logic [4:0] e,
|
||||
output logic s,
|
||||
output logic zero, inf, nan);
|
||||
|
||||
logic [9:0] f; // fraction without leading 1
|
||||
logic [4:0] eb; // biased exponent
|
||||
|
||||
assign {s, eb, f} = num; // pull bit fields out of floating-point number
|
||||
assign m = {1'b1, f}; // prepend leading 1 to fraction
|
||||
assign e = eb; // leave bias in exponent ***
|
||||
assign zero = (e == 0 && f == 0);
|
||||
assign inf = (e == 31 && f == 0);
|
||||
assign nan = (e == 31 && f != 0);
|
||||
endmodule
|
||||
|
||||
|
||||
24
examples/verilog/fma/fma16_template.v
Normal file
24
examples/verilog/fma/fma16_template.v
Normal file
@ -0,0 +1,24 @@
|
||||
// fma16.sv
|
||||
// David_Harris@hmc.edu 26 February 2022
|
||||
// 16-bit floating-point multiply-accumulate
|
||||
|
||||
// Operation: general purpose multiply, add, fma, with optional negation
|
||||
// If mul=1, p = x * y. Else p = x.
|
||||
// If add=1, result = p + z. Else result = p.
|
||||
// If negr or negz = 1, negate result or z to handle negations and subtractions
|
||||
// fadd: mul = 0, add = 1, negr = negz = 0
|
||||
// fsub: mul = 0, add = 1, negr = 0, negz = 1
|
||||
// fmul: mul = 1, add = 0, negr = 0, negz = 0
|
||||
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
|
||||
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
|
||||
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
|
||||
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
|
||||
|
||||
module fma16(
|
||||
input logic [15:0] x, y, z,
|
||||
input logic mul, add, negr, negz,
|
||||
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
|
||||
output logic [15:0] result);
|
||||
|
||||
endmodule
|
||||
|
||||
240
examples/verilog/fma/fma16_testgen.c
Normal file
240
examples/verilog/fma/fma16_testgen.c
Normal file
@ -0,0 +1,240 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "softfloat.h"
|
||||
#include "softfloat_types.h"
|
||||
|
||||
typedef union sp {
|
||||
float32_t v;
|
||||
float f;
|
||||
} sp;
|
||||
|
||||
// lists of tests, terminated with 0x8000
|
||||
uint16_t easyExponents[] = {15, 0x8000};
|
||||
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
|
||||
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
|
||||
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
|
||||
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
|
||||
uint16_t zeros[] = {0x0000, 0x8000};
|
||||
uint16_t infs[] = {0x7C00, 0xFC00};
|
||||
uint16_t nans[] = {0x7D00, 0x7D01};
|
||||
|
||||
void softfloatInit(void) {
|
||||
softfloat_roundingMode = softfloat_round_minMag;
|
||||
softfloat_exceptionFlags = 0;
|
||||
softfloat_detectTininess = softfloat_tininess_beforeRounding;
|
||||
}
|
||||
|
||||
float convFloat(float16_t f16) {
|
||||
float32_t f32;
|
||||
float res;
|
||||
sp r;
|
||||
|
||||
f32 = f16_to_f32(f16);
|
||||
r.v = f32;
|
||||
res = r.f;
|
||||
return res;
|
||||
}
|
||||
|
||||
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
float16_t result;
|
||||
int op, flagVals;
|
||||
char calc[80], flags[80];
|
||||
float32_t x32, y32, z32, r32;
|
||||
float xf, yf, zf, rf;
|
||||
float16_t smallest;
|
||||
|
||||
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
|
||||
if (!add) z.v = 0x0000; // force z to 0 to avoid add
|
||||
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
|
||||
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
|
||||
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
|
||||
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
|
||||
softfloat_exceptionFlags = 0; // clear exceptions
|
||||
result = f16_mulAdd(x, y, z);
|
||||
|
||||
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
|
||||
(softfloat_exceptionFlags >> 4) % 2,
|
||||
(softfloat_exceptionFlags >> 2) % 2,
|
||||
(softfloat_exceptionFlags >> 1) % 2,
|
||||
(softfloat_exceptionFlags) % 2);
|
||||
// pack these four flags into one nibble, discarding DZ flag
|
||||
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
|
||||
|
||||
|
||||
// convert to floats for printing
|
||||
xf = convFloat(x);
|
||||
yf = convFloat(y);
|
||||
zf = convFloat(z);
|
||||
rf = convFloat(result);
|
||||
if (mul)
|
||||
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
|
||||
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
|
||||
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
|
||||
|
||||
// omit denorms, which aren't required for this project
|
||||
smallest.v = 0x0400;
|
||||
float16_t resultmag = result;
|
||||
resultmag.v &= 0x7FFF; // take absolute value
|
||||
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
|
||||
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
|
||||
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
|
||||
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
|
||||
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
|
||||
}
|
||||
|
||||
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
|
||||
FILE *fptr, int *numCases) {
|
||||
int i, j;
|
||||
|
||||
fprintf(fptr, desc); fprintf(fptr, "\n");
|
||||
*numCases=0;
|
||||
for (i=0; e[i] != 0x8000; i++)
|
||||
for (j=0; f[j] != 0x8000; j++) {
|
||||
cases[*numCases].v = f[j] | e[i]<<10;
|
||||
*numCases = *numCases + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
z.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
y.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
y.v = 0x0000;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
z.v = cases[j].v;
|
||||
for (k=0; k<=sgn; k++) {
|
||||
z.v ^= (k<<15);
|
||||
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, l, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (l=0; l<=sgn; l++) {
|
||||
z.v ^= (l<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
|
||||
int i, j, k, sx, sy, sz, numCases;
|
||||
float16_t x, y, z;
|
||||
float16_t cases[100000];
|
||||
FILE *fptr;
|
||||
char fn[80];
|
||||
|
||||
sprintf(fn, "work/%s.tv", testName);
|
||||
fptr = fopen(fn, "w");
|
||||
prepTests(e, f, testName, desc, cases, fptr, &numCases);
|
||||
cases[numCases].v = 0x0000; // add +0 case
|
||||
cases[numCases+1].v = 0x8000; // add -0 case
|
||||
numCases += 2;
|
||||
for (i=0; i < numCases; i++) {
|
||||
x.v = cases[i].v;
|
||||
for (j=0; j<numCases; j++) {
|
||||
y.v = cases[j].v;
|
||||
for (k=0; k<numCases; k++) {
|
||||
z.v = cases[k].v;
|
||||
for (sx=0; sx<=sgn; sx++) {
|
||||
x.v ^= (sx<<15);
|
||||
for (sy=0; sy<=sgn; sy++) {
|
||||
y.v ^= (sy<<15);
|
||||
for (sz=0; sz<=sgn; sz++) {
|
||||
z.v ^= (sz<<15);
|
||||
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
softfloatInit(); // configure softfloat modes
|
||||
|
||||
// Test cases: multiplication
|
||||
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: addition
|
||||
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: FMA
|
||||
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
|
||||
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
|
||||
|
||||
// Test cases: Zero, Infinity, NaN
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
|
||||
|
||||
// Full test cases with other rounding modes
|
||||
softfloat_roundingMode = softfloat_round_near_even;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_min;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
|
||||
softfloat_roundingMode = softfloat_round_max;
|
||||
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
8
examples/verilog/fma/lint-fma
Executable file
8
examples/verilog/fma/lint-fma
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
# check for warnings in Verilog code
|
||||
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
|
||||
export PATH=$PATH:/usr/local/bin/
|
||||
verilator=`which verilator`
|
||||
|
||||
basepath=$(dirname $0)/..
|
||||
$verilator --lint-only --top-module fma16 fma16.v
|
||||
2
examples/verilog/fma/sim-fma
Executable file
2
examples/verilog/fma/sim-fma
Executable file
@ -0,0 +1,2 @@
|
||||
vsim -do "do fma.do"
|
||||
|
||||
1
examples/verilog/fma/sim-fma-batch
Executable file
1
examples/verilog/fma/sim-fma-batch
Executable file
@ -0,0 +1 @@
|
||||
vsim -c -do "do fma.do"
|
||||
1
examples/verilog/fma/synth
Executable file
1
examples/verilog/fma/synth
Executable file
@ -0,0 +1 @@
|
||||
make -C ../../../synthDC synth DESIGN=fma16
|
||||
52
examples/verilog/fma/testbench.v
Normal file
52
examples/verilog/fma/testbench.v
Normal file
@ -0,0 +1,52 @@
|
||||
/* verilator lint_off STMTDLY */
|
||||
module testbench_fma16;
|
||||
reg clk, reset;
|
||||
reg [15:0] x, y, z, rexpected;
|
||||
wire [15:0] result;
|
||||
reg [7:0] ctrl;
|
||||
reg [3:0] flagsexpected;
|
||||
reg mul, add, negp, negz;
|
||||
reg [1:0] roundmode;
|
||||
reg [31:0] vectornum, errors;
|
||||
reg [75:0] testvectors[10000:0];
|
||||
|
||||
// instantiate device under test
|
||||
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
|
||||
|
||||
// generate clock
|
||||
always
|
||||
begin
|
||||
clk = 1; #5; clk = 0; #5;
|
||||
end
|
||||
|
||||
// at start of test, load vectors and pulse reset
|
||||
initial
|
||||
begin
|
||||
$readmemh("work/fmul_0.tv", testvectors);
|
||||
vectornum = 0; errors = 0;
|
||||
reset = 1; #22; reset = 0;
|
||||
end
|
||||
|
||||
// apply test vectors on rising edge of clk
|
||||
always @(posedge clk)
|
||||
begin
|
||||
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
|
||||
{roundmode, mul, add, negp, negz} = ctrl[5:0];
|
||||
end
|
||||
|
||||
// check results on falling edge of clk
|
||||
always @(negedge clk)
|
||||
if (~reset) begin // skip during reset
|
||||
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
|
||||
$display("Error: inputs %h * %h + %h", x, y, z);
|
||||
$display(" result = %h (%h expected)", result, rexpected);
|
||||
errors = errors + 1;
|
||||
end
|
||||
vectornum = vectornum + 1;
|
||||
if (testvectors[vectornum] === 'x) begin
|
||||
$display("%d tests completed with %d errors",
|
||||
vectornum, errors);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
86257
examples/verilog/fma/torture.tv
Normal file
86257
examples/verilog/fma/torture.tv
Normal file
File diff suppressed because it is too large
Load Diff
130
examples/verilog/fma/torturegen.pl
Executable file
130
examples/verilog/fma/torturegen.pl
Executable file
@ -0,0 +1,130 @@
|
||||
#!/usr/bin/perl -w
|
||||
# torturegen.pl
|
||||
# David_Harris@hmc.edu 19 April 2022
|
||||
# Convert TestFloat cases into format for fma16 project torture test
|
||||
# Strip out cases involving denorms
|
||||
|
||||
use strict;
|
||||
|
||||
my @basenames = ("add", "mul", "mulAdd");
|
||||
my @roundingmodes = ("rz", "rd", "ru", "rne");
|
||||
my @names = ();
|
||||
foreach my $name (@basenames) {
|
||||
foreach my $mode (@roundingmodes) {
|
||||
push(@names, "f16_${name}_$mode.tv");
|
||||
}
|
||||
}
|
||||
|
||||
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
|
||||
my $datestring = localtime();
|
||||
print(TORTURE "// Torture tests generated $datestring by $0\n");
|
||||
foreach my $tv (@names) {
|
||||
open(TV, "work/$tv") || die("Can't read $tv");
|
||||
my $type = &getType($tv); # is it mul, add, mulAdd
|
||||
my $rm = &getRm($tv); # rounding mode
|
||||
# if ($rm != 0) { next; } # only do rz
|
||||
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
|
||||
my $linecount = 0;
|
||||
my $babyTorture = 0;
|
||||
while (<TV>) {
|
||||
my $line = $_;
|
||||
$linecount++;
|
||||
my $density = 10;
|
||||
if ($type eq "mulAdd") {$density = 500;}
|
||||
if ($babyTorture) {
|
||||
$density = 100;
|
||||
if ($type eq "mulAdd") {$density = 50000;}
|
||||
}
|
||||
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
|
||||
chomp($line); # strip off newline
|
||||
my @parts = split(/_/, $line);
|
||||
my ($x, $y, $z, $op, $w, $flags);
|
||||
$x = $parts[0];
|
||||
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
|
||||
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
|
||||
$op = $rm << 4;
|
||||
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
|
||||
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
|
||||
my $opname = sprintf("%02x", $op);
|
||||
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
|
||||
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
|
||||
$flags = substr($flags, -1); # take last character
|
||||
if (&fpval($w) eq "NaN") { $w = "7e00"; }
|
||||
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
|
||||
my $skip = "";
|
||||
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
|
||||
$skip = "Skipped denorm";
|
||||
}
|
||||
my $summary = &summary($x, $y, $z, $w, $type);
|
||||
if ($skip ne "") {
|
||||
print TORTURE "// $skip $tv line $linecount $line $summary\n"
|
||||
}
|
||||
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
|
||||
}
|
||||
close(TV);
|
||||
}
|
||||
close(TORTURE);
|
||||
|
||||
sub fpval {
|
||||
my $val = shift;
|
||||
$val = hex($val); # convert hex string to number
|
||||
my $frac = $val & 0x3FF;
|
||||
my $exp = ($val >> 10) & 0x1F;
|
||||
my $sign = $val >> 15;
|
||||
|
||||
my $res;
|
||||
if ($exp == 31 && $frac != 0) { return "NaN"; }
|
||||
elsif ($exp == 31) { $res = "INF"; }
|
||||
elsif ($val == 0) { $res = 0; }
|
||||
elsif ($exp == 0) { $res = "Denorm"; }
|
||||
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
|
||||
|
||||
if ($sign == 1) { $res = "-$res"; }
|
||||
return $res;
|
||||
}
|
||||
|
||||
sub summary {
|
||||
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
|
||||
|
||||
my $xv = &fpval($x);
|
||||
my $yv = &fpval($y);
|
||||
my $zv = &fpval($z);
|
||||
my $wv = &fpval($w);
|
||||
|
||||
if ($type eq "add") { return "$xv + $zv = $wv"; }
|
||||
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
|
||||
else {return "$xv * $yv + $zv = $wv"; }
|
||||
}
|
||||
|
||||
sub getType {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /mulAdd/) { return("mulAdd"); }
|
||||
elsif ($tv =~ /mul/) { return "mul"; }
|
||||
else { return "add"; }
|
||||
}
|
||||
|
||||
sub getRm {
|
||||
my $tv = shift;
|
||||
|
||||
if ($tv =~ /rz/) { return 0; }
|
||||
elsif ($tv =~ /rne/) { return 1; }
|
||||
elsif ($tv =~ /rd/) {return 2; }
|
||||
elsif ($tv =~ /ru/) { return 3; }
|
||||
else { return "bad"; }
|
||||
}
|
||||
|
||||
sub isdenorm {
|
||||
my $fp = shift;
|
||||
my $val = hex($fp);
|
||||
my $expv = $val >> 10;
|
||||
$expv = $expv & 0x1F;
|
||||
my $denorm = 0;
|
||||
if ($expv == 0 && $val != 0) { $denorm = 1;}
|
||||
# my $e0 = ($expv == 0);
|
||||
# my $vn0 = ($val != 0);
|
||||
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
|
||||
# print("Num $fp Exp $expv Denorm $denorm Done\n");
|
||||
return $denorm;
|
||||
}
|
||||
62
examples/verilog/fma/wave.do
Normal file
62
examples/verilog/fma/wave.do
Normal file
@ -0,0 +1,62 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate /testbench_fma16/clk
|
||||
add wave -noupdate /testbench_fma16/reset
|
||||
add wave -noupdate /testbench_fma16/x
|
||||
add wave -noupdate /testbench_fma16/y
|
||||
add wave -noupdate /testbench_fma16/z
|
||||
add wave -noupdate /testbench_fma16/result
|
||||
add wave -noupdate /testbench_fma16/rexpected
|
||||
add wave -noupdate /testbench_fma16/dut/x
|
||||
add wave -noupdate /testbench_fma16/dut/y
|
||||
add wave -noupdate /testbench_fma16/dut/z
|
||||
add wave -noupdate /testbench_fma16/dut/mul
|
||||
add wave -noupdate /testbench_fma16/dut/add
|
||||
add wave -noupdate /testbench_fma16/dut/negr
|
||||
add wave -noupdate /testbench_fma16/dut/negz
|
||||
add wave -noupdate /testbench_fma16/dut/roundmode
|
||||
add wave -noupdate /testbench_fma16/dut/result
|
||||
add wave -noupdate /testbench_fma16/dut/XManE
|
||||
add wave -noupdate /testbench_fma16/dut/YManE
|
||||
add wave -noupdate /testbench_fma16/dut/ZManE
|
||||
add wave -noupdate /testbench_fma16/dut/XExpE
|
||||
add wave -noupdate /testbench_fma16/dut/YExpE
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpE
|
||||
add wave -noupdate /testbench_fma16/dut/PExpE
|
||||
add wave -noupdate /testbench_fma16/dut/Ne
|
||||
add wave -noupdate /testbench_fma16/dut/upOneExt
|
||||
add wave -noupdate /testbench_fma16/dut/XSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/YSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ZSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/PSgnE
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManE
|
||||
add wave -noupdate /testbench_fma16/dut/NfracS
|
||||
add wave -noupdate /testbench_fma16/dut/ProdManAl
|
||||
add wave -noupdate /testbench_fma16/dut/ZManExt
|
||||
add wave -noupdate /testbench_fma16/dut/ZManAl
|
||||
add wave -noupdate /testbench_fma16/dut/Nfrac
|
||||
add wave -noupdate /testbench_fma16/dut/res
|
||||
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
|
||||
add wave -noupdate /testbench_fma16/dut/NSamt
|
||||
add wave -noupdate /testbench_fma16/dut/ZExpGreater
|
||||
add wave -noupdate /testbench_fma16/dut/ACLess
|
||||
add wave -noupdate /testbench_fma16/dut/upOne
|
||||
add wave -noupdate /testbench_fma16/dut/KillProd
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
|
||||
quietly wave cursor active 2
|
||||
configure wave -namecolwidth 237
|
||||
configure wave -valuecolwidth 64
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 0
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {4083 ns} {4235 ns}
|
||||
@ -55,20 +55,22 @@
|
||||
`define Q_NE 32'd15
|
||||
`define Q_NF 32'd112
|
||||
`define Q_BIAS 32'd16383
|
||||
`define Q_FMT 2'd3
|
||||
`define D_LEN 32'd64
|
||||
`define D_NE 32'd11
|
||||
`define D_NF 32'd52
|
||||
`define D_BIAS 32'd1023
|
||||
`define D_FMT 32'd1
|
||||
`define D_FMT 2'd1
|
||||
`define S_LEN 32'd32
|
||||
`define S_NE 32'd8
|
||||
`define S_NF 32'd23
|
||||
`define S_BIAS 32'd127
|
||||
`define S_FMT 32'd1
|
||||
`define S_FMT 2'd0
|
||||
`define H_LEN 32'd16
|
||||
`define H_NE 32'd5
|
||||
`define H_NF 32'd10
|
||||
`define H_BIAS 32'd15
|
||||
`define H_FMT 2'd2
|
||||
|
||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
|
||||
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
|
||||
@ -91,6 +93,12 @@
|
||||
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
|
||||
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
|
||||
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
/* verilator lint_off STMTDLY */
|
||||
|
||||
@ -1 +0,0 @@
|
||||
vsim -c -do wally-coremark.do
|
||||
@ -9,4 +9,4 @@
|
||||
# sqrt - test square ro
|
||||
# all - test everything
|
||||
|
||||
vsim -do "do testfloat.do rv64fpquad cmp"
|
||||
vsim -do "do testfloat.do rv64fp mul"
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
# wally-coremark.do
|
||||
#
|
||||
# Modification by Oklahoma State University & Harvey Mudd College
|
||||
# Use with Testbench
|
||||
# James Stine, 2008; David Harris 2021
|
||||
# Go Cowboys!!!!!!
|
||||
#
|
||||
# Takes 1:10 to run RV64IC tests using gui
|
||||
|
||||
# Use this wally-coremark.do file to run this example.
|
||||
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
|
||||
# do wally-coremark.do
|
||||
# or, to run from a shell, type the following at the shell prompt:
|
||||
# vsim -do wally-coremark.do -c
|
||||
# (omit the "-c" to see the GUI while running from the shell)
|
||||
|
||||
onbreak {resume}
|
||||
|
||||
# create library
|
||||
if [file exists work] {
|
||||
vdel -all
|
||||
}
|
||||
vlib work
|
||||
|
||||
# compile source files
|
||||
# suppress spurious warnngs about
|
||||
# "Extra checking for conflicts with always_comb done at vopt time"
|
||||
# because vsim will run vopt
|
||||
|
||||
# default to config/coremark, but allow this to be overridden at the command line. For example:
|
||||
#vlog +incdir+../config/coremark_bare +incdir+../config/shared ../testbench/testbench-coremark_bare.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
vlog +incdir+../config/rv64gc +incdir+../config/shared ../testbench/testbench-coremark_bare.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
|
||||
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
vopt +acc work.testbench -o workopt
|
||||
vsim workopt
|
||||
|
||||
mem load -startaddress 268435456 -endaddress 268566527 -filltype value -fillradix hex -filldata 0 /testbench/dut/uncore/ram/ram/RAM
|
||||
|
||||
#add log -recursive /*
|
||||
do wave.do
|
||||
run -all
|
||||
#run 21400
|
||||
#quit
|
||||
@ -1,502 +0,0 @@
|
||||
onerror {resume}
|
||||
quietly WaveActivateNextPane {} 0
|
||||
add wave -noupdate /testbench/clk
|
||||
add wave -noupdate /testbench/reset
|
||||
add wave -noupdate /testbench/test
|
||||
add wave -noupdate /testbench/memfilename
|
||||
add wave -noupdate /testbench/dut/core/SATP_REGW
|
||||
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
|
||||
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
|
||||
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/priv/trap/InstrValidM
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
|
||||
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/MemAdrM
|
||||
add wave -noupdate /testbench/dut/core/ieu/dp/ResultM
|
||||
add wave -noupdate /testbench/dut/core/ieu/dp/ResultW
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/IllegalInstrFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/BreakpointFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StoreAmoMisalignedFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StoreAmoAccessFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/EcallFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrPageFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadPageFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StorePageFaultM
|
||||
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InterruptM
|
||||
add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/PendingIntsM
|
||||
add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/CommittedM
|
||||
add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/InstrValidM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/TrapM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/ICacheStallF
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
|
||||
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/MulDivStallD
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
|
||||
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
|
||||
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallF
|
||||
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallD
|
||||
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallE
|
||||
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallM
|
||||
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallW
|
||||
add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]}
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPInstrClassE[0]}
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredDirWrongE
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} -divider {class check}
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightNonCFI
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongCFI
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongNonCFI
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPRight
|
||||
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPWrong
|
||||
add wave -noupdate -group Bpred -radix hexadecimal -childformat {{{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} -radix binary}} -subitemconfig {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} {-height 16 -radix binary}} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNext
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRUpdateEN
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr0
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr1
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateEN
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRLookup
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/RA1
|
||||
add wave -noupdate -group Bpred -expand -group prediction -radix binary /testbench/dut/core/ifu/bpred/bpred/BPPredF
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BTBValidF
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BPInstrClassF
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BTBPredPCF
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/LookUpPCIndex
|
||||
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/TargetPC
|
||||
add wave -noupdate -group Bpred -expand -group prediction -expand -group ex -radix binary /testbench/dut/core/ifu/bpred/bpred/BPPredE
|
||||
add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
|
||||
add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/BPPredDirWrongE
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdatePCIndex
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateTarget
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateEN
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdatePC
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateTarget
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCE
|
||||
add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/WA1
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/TargetWrongE
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/FallThroughWrongE
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionPCWrongE
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/InstrClassE
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionInstrClassWrongE
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredClassNonCFIWrongE
|
||||
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE
|
||||
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/bus/icache/FinalInstrRawF
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrE
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrM
|
||||
add wave -noupdate -group {instruction pipeline} /testbench/InstrW
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredPCF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext0F
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext1F
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/SelBPPredF
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredWrongE
|
||||
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PrivilegedChangePCM
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/InstrD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/InstrDName
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
|
||||
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
|
||||
add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a3
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd1
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd2
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/we3
|
||||
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/wd3
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ReadDataW
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/CSRReadValW
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultSrcW
|
||||
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/A
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/B
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUControl
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/result
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/FlagsE
|
||||
add wave -noupdate -group alu -divider internals
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/overflow
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/carry
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/zero
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/neg
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/lt
|
||||
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ltu
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1D
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2D
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1E
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2E
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdE
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdM
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdW
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/MemReadE
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RegWriteM
|
||||
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RegWriteW
|
||||
add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/ForwardAE
|
||||
add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/ForwardBE
|
||||
add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/LoadStallD
|
||||
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/WriteDataE
|
||||
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/ALUResultE
|
||||
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/SrcAE
|
||||
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/SrcBE
|
||||
add wave -noupdate -group PCS /testbench/dut/core/ifu/PCNextF
|
||||
add wave -noupdate -group PCS /testbench/dut/core/PCF
|
||||
add wave -noupdate -group PCS /testbench/dut/core/ifu/PCD
|
||||
add wave -noupdate -group PCS /testbench/dut/core/PCE
|
||||
add wave -noupdate -group PCS /testbench/dut/core/PCM
|
||||
add wave -noupdate -group PCS /testbench/PCW
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/Funct3E
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/MulDivE
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/W64E
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/StallM
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/StallW
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/FlushM
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/FlushW
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/MulDivResultW
|
||||
add wave -noupdate -group muldiv /testbench/dut/core/mdu/DivBusyE
|
||||
add wave -noupdate -group icache -color Gold /testbench/dut/core/ifu/bus/icache/controller/CurrState
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/BasePAdrF
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/HitWay
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/VictimWay
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/WriteEnable}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/SetValid}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/CacheTagMem/StoredData}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/ValidBits}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/controller/NextState
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/ITLBMissF
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ITLBWriteF
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ReadLineF
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ReadLineF
|
||||
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/BasePAdrF
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/hit
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spill
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/ICacheStallF
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spillSave
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spillSave
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/CntReset
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/PreCntEn
|
||||
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/CntEn
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/InstrPAdrF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/InstrInF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/FetchCountFlag
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/FetchCount
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/InstrReadF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/InstrAckF
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/ICacheMemWriteEnable
|
||||
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/ICacheBusWriteData
|
||||
add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/InstrReadF
|
||||
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/MemSizeM
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HCLK
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HRESETn
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HRDATA
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HREADY
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HRESP
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDR
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWDATA
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITE
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZE
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HBURST
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HPROT
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HTRANS
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED
|
||||
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED
|
||||
add wave -noupdate -group lsu -expand -group {LSU ARB} /testbench/dut/core/lsu/arbiter/SelPTW
|
||||
add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu.bus.dcache/dcachefsm/CurrState
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/WalkerPageFaultM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/WriteDataM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMBlockWriteEnableM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWordWriteEnableM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWayWriteEnable
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWordEnable
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMBlockWayWriteEnableM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SelAdrM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/ReadDataBlockM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/DCacheBusWriteData
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/SetValid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/SetDirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/CacheTagMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/DirtyBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/ValidBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/DirtyBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/SetDirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/WriteWordEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/CacheTagMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/SetValid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/SetDirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/CacheTagMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/DirtyBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/ValidBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/SetValid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/SetDirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/CacheTagMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/DirtyBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/ValidBits}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/SetValid
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/ClearValid
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/SetDirty
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/ClearDirty
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/HitWay}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/Valid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/Dirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/ReadTag}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/HitWay}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/Valid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/Dirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/ReadTag}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/HitWay}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/Valid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/Dirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/ReadTag}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/HitWay}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/Valid}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/Dirty}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/ReadTag}
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/HitWay
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataBlockWayMaskedM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataWordM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataWordMuxM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimTag
|
||||
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimWay
|
||||
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimDirtyWay
|
||||
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimDirty
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemRWM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemAdrE
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemPAdrM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/Funct3M
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/Funct7M
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/AtomicM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/FlushDCacheM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/CacheableM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/WriteDataM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/ReadDataM
|
||||
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/DCacheStallM
|
||||
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/FlushAdrFlag
|
||||
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu.bus.dcache/HitWay
|
||||
add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu.bus.dcache/CacheHit
|
||||
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu.bus.dcache/FetchCount
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/FetchCountFlag
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBPAdr
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBRead
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBWrite
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBAck
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/HRDATA
|
||||
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/HWDATA
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/EffectivePrivilegeMode
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/Translate
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/DisableTranslation
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/TLBMiss
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/TLBHit
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/PhysicalAddress
|
||||
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/TLBPageFault
|
||||
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/LoadAccessFaultM
|
||||
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/StoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/TLBPAdr
|
||||
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/genblk1/tlb/PTE
|
||||
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/genblk1/tlb/TLBWrite
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/PhysicalAddress
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/SelRegions
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/Cacheable
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/Idempotent
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/AtomicAllowed
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/PMAAccessFault
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMAInstrAccessFaultF
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMALoadAccessFaultM
|
||||
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMAStoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPInstrAccessFaultF
|
||||
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPLoadAccessFaultM
|
||||
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPStoreAmoAccessFaultM
|
||||
add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/genblk1/WalkerState
|
||||
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/PCF
|
||||
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/genblk1/TranslationVAdr
|
||||
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/TranslationPAdr
|
||||
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/HPTWReadPTE
|
||||
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/PTE
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/ITLBMissF
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/DTLBMissM
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/ITLBWriteF
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/DTLBWriteM
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerInstrPageFaultF
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerLoadPageFaultM
|
||||
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerStorePageFaultM
|
||||
add wave -noupdate -group csr /testbench/dut/core/priv/csr/MIP_REGW
|
||||
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/TLBWrite
|
||||
add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF
|
||||
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/PhysicalAddress
|
||||
add wave -noupdate /testbench/dut/core/lsu.bus.dcache/VAdr
|
||||
add wave -noupdate /testbench/dut/core/lsu.bus.dcache/MemPAdrM
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
|
||||
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/ExtIntM
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
|
||||
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
|
||||
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HCLK
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESETn
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HSELUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HADDR
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWRITE
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWDATA
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESPUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADYUART
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/CTSb
|
||||
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/RIb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/SOUT
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RTSb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/DTRb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/OUT1b
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/OUT2b
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb
|
||||
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb
|
||||
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HCLK
|
||||
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART
|
||||
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR
|
||||
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE
|
||||
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA
|
||||
add wave -noupdate -radix unsigned /testbench/dut/core/priv/csr/genblk1/counters/genblk1/CYCLE_REGW
|
||||
add wave -noupdate -radix unsigned /testbench/dut/core/priv/csr/genblk1/counters/genblk1/INSTRET_REGW
|
||||
add wave -noupdate -label LoadStall -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[3]}
|
||||
add wave -noupdate -label {Branch Instr} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[5]}
|
||||
add wave -noupdate -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[4]}
|
||||
add wave -noupdate -label {Jump, Jal, Jalr} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[7]}
|
||||
add wave -noupdate -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[8]}
|
||||
add wave -noupdate -label {BTB Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[6]}
|
||||
add wave -noupdate -label {BP Class Non CFI Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[10]}
|
||||
add wave -noupdate -label DCacheAccess -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[11]}
|
||||
add wave -noupdate -label DCacheMiss -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[12]}
|
||||
add wave -noupdate -label Return -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[9]}
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/MCOUNTINHIBIT_REGW
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/InstrValidM
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/InstrValidNotFlushedM
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/BPPredDirWrongM
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/genblk1/genblk1/LoadStallM
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/genblk1/NextHPMCOUNTERM
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/DCacheMiss
|
||||
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/DCacheAccess
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 6} {17923831 ns} 0}
|
||||
quietly wave cursor active 1
|
||||
configure wave -namecolwidth 250
|
||||
configure wave -valuecolwidth 297
|
||||
configure wave -justifyvalue left
|
||||
configure wave -signalnamewidth 1
|
||||
configure wave -snapdistance 10
|
||||
configure wave -datasetprefix 0
|
||||
configure wave -rowmargin 4
|
||||
configure wave -childrowmargin 2
|
||||
configure wave -gridoffset 0
|
||||
configure wave -gridperiod 1
|
||||
configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ns
|
||||
update
|
||||
WaveRestoreZoom {0 ns} {18715695 ns}
|
||||
@ -67,6 +67,7 @@ add wave -hex /testbench/dut/core/ebu/HTRANS
|
||||
add wave -hex /testbench/dut/core/ebu/HRDATA
|
||||
add wave -hex /testbench/dut/core/ebu/HWRITE
|
||||
add wave -hex /testbench/dut/core/ebu/HWDATA
|
||||
add wave -hex /testbench/dut/core/ebu/HBURST
|
||||
add wave -hex /testbench/dut/core/ebu/CaptureDataM
|
||||
add wave -divider
|
||||
|
||||
|
||||
@ -1,102 +1,9 @@
|
||||
|
||||
add wave -noupdate /testbenchfp/clk
|
||||
add wave -noupdate -radix decimal /testbenchfp/VectorNum
|
||||
add wave -group Other -noupdate /testbenchfp/FrmNum
|
||||
add wave -group Other -noupdate /testbenchfp/X
|
||||
add wave -group Other -noupdate /testbenchfp/Y
|
||||
add wave -group Other -noupdate /testbenchfp/Z
|
||||
add wave -group Other -noupdate /testbenchfp/Res
|
||||
add wave -group Other -noupdate /testbenchfp/Ans
|
||||
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneX
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneY
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneZ
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneRes
|
||||
add wave -group Rne -noupdate /testbenchfp/FmaRneAns
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzX
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzY
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzZ
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzRes
|
||||
add wave -group Rz -noupdate /testbenchfp/FmaRzAns
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuX
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuY
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuZ
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuRes
|
||||
add wave -group Ru -noupdate /testbenchfp/FmaRuAns
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdX
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdY
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdZ
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdRes
|
||||
add wave -group Rd -noupdate /testbenchfp/FmaRdAns
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
|
||||
add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
|
||||
add wave -group AllSignals -noupdate /*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
|
||||
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
|
||||
add wave -noupdate /testbenchfp/FrmNum
|
||||
add wave -noupdate /testbenchfp/X
|
||||
add wave -noupdate /testbenchfp/Y
|
||||
add wave -noupdate /testbenchfp/Z
|
||||
add wave -noupdate /testbenchfp/Res
|
||||
add wave -noupdate /testbenchfp/Ans
|
||||
|
||||
@ -473,6 +473,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA
|
||||
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete
|
||||
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF
|
||||
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState
|
||||
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0
|
||||
|
||||
@ -45,6 +45,10 @@ module ahblite (
|
||||
input logic IFUBusRead,
|
||||
output logic [`XLEN-1:0] IFUBusHRDATA,
|
||||
output logic IFUBusAck,
|
||||
output logic IFUBusInit,
|
||||
input logic [2:0] IFUBurstType,
|
||||
input logic [1:0] IFUTransType,
|
||||
input logic IFUTransComplete,
|
||||
// Signals from Data Cache
|
||||
input logic [`PA_BITS-1:0] LSUBusAdr,
|
||||
input logic LSUBusRead,
|
||||
@ -52,7 +56,11 @@ module ahblite (
|
||||
input logic [`XLEN-1:0] LSUBusHWDATA,
|
||||
output logic [`XLEN-1:0] LSUBusHRDATA,
|
||||
input logic [2:0] LSUBusSize,
|
||||
input logic [2:0] LSUBurstType,
|
||||
input logic [1:0] LSUTransType,
|
||||
input logic LSUTransComplete,
|
||||
output logic LSUBusAck,
|
||||
output logic LSUBusInit,
|
||||
// AHB-Lite external signals
|
||||
(* mark_debug = "true" *) input logic [`AHBW-1:0] HRDATA,
|
||||
(* mark_debug = "true" *) input logic HREADY, HRESP,
|
||||
@ -87,6 +95,9 @@ module ahblite (
|
||||
// Data accesses have priority over instructions. However, if a data access comes
|
||||
// while an instruction read is occuring, the instruction read finishes before
|
||||
// the data access can take place.
|
||||
// *** This is no longer true when adding burst mode. We need to finish the current
|
||||
// read before doing another read. Need to work this out, but preliminarily we can
|
||||
// store the current read type in a flop and use that to figure out what burst type to use.
|
||||
|
||||
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);
|
||||
|
||||
@ -100,19 +111,21 @@ module ahblite (
|
||||
// interface that might be used in place of the ahblite.
|
||||
always_comb
|
||||
case (BusState)
|
||||
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
|
||||
else if (LSUBusWrite)NextBusState = MEMWRITE;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
MEMREAD: if (~HREADY) NextBusState = MEMREAD;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
MEMWRITE: if (~HREADY) NextBusState = MEMWRITE;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
INSTRREAD: if (~HREADY) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE; // if (IFUBusRead still high) *** need to wait?
|
||||
default: NextBusState = IDLE;
|
||||
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
|
||||
else if (LSUBusWrite) NextBusState = MEMWRITE;
|
||||
else if (IFUBusRead) NextBusState = INSTRREAD;
|
||||
else NextBusState = IDLE;
|
||||
MEMREAD: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
|
||||
else if (LSUTransComplete) NextBusState = IDLE;
|
||||
else NextBusState = MEMREAD;
|
||||
MEMWRITE: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
|
||||
else if (LSUTransComplete) NextBusState = IDLE;
|
||||
else NextBusState = MEMWRITE;
|
||||
INSTRREAD: if (IFUTransComplete & LSUBusRead) NextBusState = MEMREAD;
|
||||
else if (IFUTransComplete & LSUBusWrite) NextBusState = MEMWRITE;
|
||||
else if (IFUTransComplete) NextBusState = IDLE;
|
||||
else NextBusState = INSTRREAD;
|
||||
default: NextBusState = IDLE;
|
||||
endcase
|
||||
|
||||
|
||||
@ -122,7 +135,7 @@ module ahblite (
|
||||
assign #1 HADDR = AccessAddress;
|
||||
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
|
||||
assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
|
||||
assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
|
||||
assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.
|
||||
|
||||
/* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
|
||||
000: Single (SINGLE)
|
||||
@ -133,15 +146,16 @@ module ahblite (
|
||||
101: 8-beat incrementing burst (INCR8)
|
||||
110: 16-beat wrapping burst (WRAP16) [wraps if X in 0X000000]
|
||||
111: 16-beat incrementing burst (INCR16)
|
||||
*/
|
||||
*** Remove if not necessary
|
||||
*/
|
||||
|
||||
|
||||
assign HPROT = 4'b0011; // not used; see Section 3.7
|
||||
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
|
||||
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
|
||||
assign HMASTLOCK = 0; // no locking supported
|
||||
assign HWRITE = NextBusState == MEMWRITE;
|
||||
assign HWRITE = (NextBusState == MEMWRITE);
|
||||
// delay write data by one cycle for
|
||||
flop #(`XLEN) wdreg(HCLK, LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
|
||||
flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
|
||||
// delay signals for subword writes
|
||||
flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD);
|
||||
flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
|
||||
@ -153,7 +167,9 @@ module ahblite (
|
||||
|
||||
assign IFUBusHRDATA = HRDATA;
|
||||
assign LSUBusHRDATA = HRDATA;
|
||||
assign IFUBusAck = (BusState == INSTRREAD) & (NextBusState != INSTRREAD);
|
||||
assign LSUBusAck = (BusState == MEMREAD) & (NextBusState != MEMREAD) | (BusState == MEMWRITE) & (NextBusState != MEMWRITE);
|
||||
assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD);
|
||||
assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE));
|
||||
assign IFUBusAck = HREADY & (BusState == INSTRREAD);
|
||||
assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE));
|
||||
|
||||
endmodule
|
||||
|
||||
69
pipelined/src/fpu/cvtshiftcalc.sv
Normal file
69
pipelined/src/fpu/cvtshiftcalc.sv
Normal file
@ -0,0 +1,69 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module cvtshiftcalc(
|
||||
input logic XZeroM,
|
||||
input logic ToInt,
|
||||
input logic IntToFp,
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NF:0] XManM, // input mantissas
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic CvtResDenormUfM,
|
||||
output logic CvtResUf,
|
||||
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
|
||||
);
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
// Other problems:
|
||||
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
|
||||
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | LzcInM | 0's if nessisary |
|
||||
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
|
||||
{CvtLzcInM, {`NF+1{1'b0}}};
|
||||
|
||||
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
|
||||
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
|
||||
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
|
||||
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
|
||||
endcase
|
||||
end
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
|
||||
|
||||
endmodule
|
||||
@ -2,13 +2,12 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
// FOpCtrlE values
|
||||
// 111 min
|
||||
// 110 min
|
||||
// 101 max
|
||||
// 010 equal
|
||||
// 001 less than
|
||||
// 011 less than or equal
|
||||
|
||||
|
||||
module fcmp (
|
||||
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // see above table
|
||||
@ -20,12 +19,13 @@ module fcmp (
|
||||
input logic XSNaNE, YSNaNE, // is signaling NaN
|
||||
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
|
||||
output logic CmpNVE, // invalid flag
|
||||
output logic [`FLEN-1:0] CmpResE // compare resilt
|
||||
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
|
||||
output logic [`XLEN-1:0] CmpIntResE // compare resilt
|
||||
);
|
||||
|
||||
logic LTabs, LT, EQ; // is X < or > or = Y
|
||||
logic [`FLEN-1:0] NaNRes;
|
||||
logic BothZeroE, EitherNaNE, EitherSNaNE;
|
||||
logic BothZero, EitherNaN, EitherSNaN;
|
||||
|
||||
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
|
||||
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
|
||||
@ -36,9 +36,9 @@ module fcmp (
|
||||
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
|
||||
assign EQ = (FSrcXE == FSrcYE);
|
||||
|
||||
assign BothZeroE = XZeroE&YZeroE;
|
||||
assign EitherNaNE = XNaNE|YNaNE;
|
||||
assign EitherSNaNE = XSNaNE|YSNaNE;
|
||||
assign BothZero = XZeroE&YZeroE;
|
||||
assign EitherNaN = XNaNE|YNaNE;
|
||||
assign EitherSNaN = XSNaNE|YSNaNE;
|
||||
|
||||
|
||||
// flags
|
||||
@ -47,12 +47,12 @@ module fcmp (
|
||||
// EQ - quiet - sets invalid if signaling NaN input
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpNVE = EitherSNaNE;//min
|
||||
3'b101: CmpNVE = EitherSNaNE;//max
|
||||
3'b010: CmpNVE = EitherSNaNE;//equal
|
||||
3'b001: CmpNVE = EitherNaNE;//less than
|
||||
3'b011: CmpNVE = EitherNaNE;//less than or equal
|
||||
default: CmpNVE = 1'b0;
|
||||
3'b110: CmpNVE = EitherSNaN;//min
|
||||
3'b101: CmpNVE = EitherSNaN;//max
|
||||
3'b010: CmpNVE = EitherSNaN;//equal
|
||||
3'b001: CmpNVE = EitherNaN;//less than
|
||||
3'b011: CmpNVE = EitherNaN;//less than or equal
|
||||
default: CmpNVE = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
@ -91,7 +91,7 @@ module fcmp (
|
||||
`FMT2:
|
||||
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
|
||||
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
default: NaNRes = (`FLEN)'(0);
|
||||
default: NaNRes = {`FLEN{1'bx}};
|
||||
endcase
|
||||
|
||||
else if (`FPSIZES == 4)
|
||||
@ -112,16 +112,12 @@ module fcmp (
|
||||
endcase
|
||||
|
||||
// when one input is a NaN -output the non-NaN
|
||||
always_comb
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
|
||||
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
|
||||
3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
|
||||
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
|
||||
3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
|
||||
3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
|
||||
3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
|
||||
default: CmpResE = (`FLEN)'(0);
|
||||
endcase
|
||||
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
|
||||
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
|
||||
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
|
||||
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
|
||||
|
||||
|
||||
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
|
||||
|
||||
endmodule
|
||||
|
||||
@ -10,99 +10,99 @@ module fctrl (
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic FRegWriteD, // FP register write enable
|
||||
output logic FDivStartD, // Start division or squareroot
|
||||
output logic [1:0] FResultSelD, // select result to be written to fp register
|
||||
output logic [1:0] FResSelD, // select result to be written to fp register
|
||||
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
|
||||
output logic [1:0] FResSelD, // select one of the results done in the memory stage
|
||||
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
|
||||
output logic [1:0] PostProcSelD,
|
||||
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
|
||||
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
output logic FWriteIntD // is the result written to the integer register
|
||||
);
|
||||
|
||||
`define FCTRLW 13
|
||||
`define FCTRLW 11
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
//*** will putting x for don't cares reduce area in synthisis???
|
||||
// FPU Instruction Decoder
|
||||
always_comb
|
||||
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
|
||||
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
|
||||
ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1;
|
||||
else case(OpD)
|
||||
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
|
||||
7'b1010011: casez(Funct7D)
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0; // fsqrt
|
||||
7'b00100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_xx_000_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_xx_001_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_xx_010_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b00101??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_xx_110_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_xx_101_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b10100??: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_00_xx_010_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_00_xx_001_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_00_xx_011_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx__0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
7'b1101000: case(Rs2D[1:0])//***reduce resSel
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
|
||||
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_xx_000_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0; // fmv.x.w to int reg
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0; // fmv.x.d to int reg
|
||||
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
7'b1101000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w w->s
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l l->s
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
|
||||
endcase
|
||||
7'b1100000: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s s->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s s->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s s->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s s->lu
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0; // fmv.w.x to fp reg
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
|
||||
7'b1101001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
|
||||
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w w->d
|
||||
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
|
||||
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l l->d
|
||||
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
|
||||
endcase
|
||||
7'b1100001: case(Rs2D[1:0])
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
|
||||
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d d->w
|
||||
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d d->wu
|
||||
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d d->l
|
||||
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d d->lu
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0; // fmv.d.x to fp reg
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
|
||||
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
|
||||
endcase
|
||||
|
||||
// unswizzle control bits
|
||||
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
|
||||
// rounding modes:
|
||||
// 000 - round to nearest, ties to even
|
||||
@ -121,82 +121,61 @@ module fctrl (
|
||||
assign FmtD = 0;
|
||||
else if (`FPSIZES == 2)begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtD = (`FMT == FmtTmp);
|
||||
end
|
||||
else if (`FPSIZES == 3|`FPSIZES == 4)
|
||||
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
|
||||
|
||||
// assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
|
||||
// FResultSel:
|
||||
// 000 - ReadRes - load
|
||||
// 001 - FMARes - FMA and multiply
|
||||
// 010 - FAddRes - add and fp to fp
|
||||
// 011 - FDivRes - divide and squareroot
|
||||
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
|
||||
// FResSel:
|
||||
// 00 - SrcA - move to fp register
|
||||
// 01 - SgnRes - sign injection
|
||||
// 10 - CmpRes - min/max
|
||||
// 11 - CvtRes - convert to fp
|
||||
|
||||
// FIntResSel:
|
||||
// 00 - CmpRes - less than, equal, or less than or equal
|
||||
// 01 - FSrcX - move to int register
|
||||
// 10 - ClassRes - classify
|
||||
// 11 - CvtRes - convert to signed/unsigned int
|
||||
// Final Res Sel:
|
||||
// fp int
|
||||
// 00 other cmp
|
||||
// 01 postproc cvt
|
||||
// 10 store class
|
||||
// 11 mv
|
||||
|
||||
// OpCtrl values:
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
// post processing Sel:
|
||||
// 00 cvt
|
||||
// 01 div
|
||||
// 10 fma
|
||||
|
||||
// cmp
|
||||
// fmin = ?111
|
||||
// fmax = ?101
|
||||
// feq = ?010
|
||||
// flt = ?001
|
||||
// fle = ?011
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
// Other Sel:
|
||||
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
|
||||
// 000 - sign 00
|
||||
// 001 - negate sign 00
|
||||
// 010 - xor sign 00
|
||||
// 011 - mv to fp 01
|
||||
// 110 - min 10
|
||||
// 101 - max 10
|
||||
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
// fnmsub = ?010 -(a*b)+c
|
||||
// fnmadd = ?011 -(a*b)-c
|
||||
// fmul = ?100
|
||||
// {?, is mul, negate product, negate addend}
|
||||
|
||||
// sgn inj
|
||||
// fsgnj = ??00
|
||||
// fsgnjn = ??01
|
||||
// fsgnjx = ??10
|
||||
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
// fcvt.s.d = 0111
|
||||
// fcvt.d.s = 0111
|
||||
// Fmt controls the output for fp -> fp
|
||||
|
||||
// convert
|
||||
// fcvt.w.s = 0010
|
||||
// fcvt.wu.s = 0110
|
||||
// fcvt.s.w = 0001
|
||||
// fcvt.s.wu = 0101
|
||||
// fcvt.l.s = 1010
|
||||
// fcvt.lu.s = 1110
|
||||
// fcvt.s.l = 1001
|
||||
// fcvt.s.lu = 1101
|
||||
// fcvt.w.d = 0010
|
||||
// fcvt.wu.d = 0110
|
||||
// fcvt.d.w = 0001
|
||||
// fcvt.d.wu = 0101
|
||||
// fcvt.l.d = 1010
|
||||
// fcvt.lu.d = 1110
|
||||
// fcvt.d.l = 1001
|
||||
// fcvt.d.lu = 1101
|
||||
// {long, unsigned, to int, from int}
|
||||
// OpCtrl:
|
||||
// Fma: {not multiply-add?, negate prod?, negate Z?}
|
||||
// 000 - fmadd
|
||||
// 001 - fmsub
|
||||
// 010 - fnmsub
|
||||
// 011 - fnmadd
|
||||
// 100 - mul
|
||||
// 110 - add
|
||||
// 111 - sub
|
||||
// Div:
|
||||
// 0 - ???
|
||||
// 1 - ???
|
||||
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
|
||||
// Cvt Fp: output format
|
||||
// 10 - to half
|
||||
// 00 - to single
|
||||
// 01 - to double
|
||||
// 11 - to quad
|
||||
// Cmp: {equal?, less than?}
|
||||
// 010 - eq
|
||||
// 001 - lt
|
||||
// 011 - le
|
||||
// 110 - min
|
||||
// 101 - max
|
||||
// Sgn:
|
||||
// 00 - sign
|
||||
// 01 - negate sign
|
||||
// 10 - xor sign
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
@ -1,8 +1,5 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
|
||||
|
||||
module fcvt (
|
||||
input logic XSgnE, // input's sign
|
||||
@ -13,14 +10,13 @@ module fcvt (
|
||||
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
|
||||
input logic XZeroE, // is the input zero
|
||||
input logic XDenormE, // is the input denormalized
|
||||
input logic XInfE, // is the input infinity
|
||||
input logic XNaNE, // is the input a NaN
|
||||
input logic XSNaNE, // is the input a signaling NaN
|
||||
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
|
||||
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
|
||||
output logic [4:0] CvtFlgE // the conversion's flags
|
||||
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
|
||||
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
|
||||
output logic CvtResDenormUfE,// does the result underflow or is denormalized
|
||||
output logic CvtResSgnE, // the result's sign
|
||||
output logic IntZeroE, // is the integer zero?
|
||||
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
@ -41,34 +37,8 @@ module fcvt (
|
||||
logic [`FMTBITS-1:0] OutFmt; // format of the output
|
||||
logic [`XLEN-1:0] PosInt; // the positive integer input
|
||||
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
|
||||
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`NE:0] CalcExp; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
|
||||
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
|
||||
logic ResDenormUf;// does the result underflow or is denormalized
|
||||
logic ResUf; // does the result underflow
|
||||
logic [`LGLEN+`NF:0] Shifted; // the shifted result
|
||||
logic [`NE-2:0] NewBias; // the bias of the final result
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
logic [`NE-1:0] OldExp; // the old exponent
|
||||
logic ResSgn; // the result's sign
|
||||
logic Sticky; // sticky bit - for rounding
|
||||
logic Round; // round bit - for rounding
|
||||
logic LSBFrac; // the least significant bit of the fraction - for rounding
|
||||
logic CalcPlus1; // the calculated plus 1
|
||||
logic Plus1; // add one to the final result?
|
||||
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
|
||||
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
|
||||
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
|
||||
logic [`NF-1:0] ResFrac; // the result's fraction
|
||||
logic [`XLEN+1:0] NegRes; // the negation of the result
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic Overflow, Underflow, Inexact, Invalid; // flags
|
||||
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
|
||||
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
|
||||
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
|
||||
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
|
||||
logic KillRes; // kill the result?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
@ -97,8 +67,9 @@ module fcvt (
|
||||
// 1) negate the input if the input is a negitive singed integer
|
||||
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
|
||||
|
||||
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
|
||||
assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
|
||||
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
|
||||
assign IntZeroE = ~|TrimInt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// lzc
|
||||
@ -107,32 +78,16 @@ module fcvt (
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
|
||||
|
||||
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
|
||||
|
||||
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// seclect the input to the shifter
|
||||
// fp -> int:
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
// Other problems:
|
||||
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
|
||||
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
|
||||
// - ex: for the case 0010000.... (double)
|
||||
// ??? -> fp:
|
||||
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | lzcIn | 0's if nessisary |
|
||||
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
|
||||
{LzcIn, {`NF+1{1'b0}}};
|
||||
// kill the shift if it's negitive
|
||||
// kill the shift if it's negitive
|
||||
// select the amount to shift by
|
||||
// fp -> int:
|
||||
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
|
||||
@ -144,47 +99,10 @@ module fcvt (
|
||||
// - only shift fp -> fp if the intital value is denormalized
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
|
||||
ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
|
||||
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
|
||||
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
|
||||
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
|
||||
|
||||
// shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is denormalized or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized:
|
||||
// | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -215,7 +133,7 @@ module fcvt (
|
||||
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
|
||||
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
|
||||
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
|
||||
default: NewBiasToFp = 1'bx;
|
||||
default: NewBiasToFp = {`NE-1{1'bx}};
|
||||
endcase
|
||||
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
|
||||
|
||||
@ -262,40 +180,11 @@ module fcvt (
|
||||
// - shift left to normilize (-1-ZeroCnt)
|
||||
// - newBias to make the biased exponent
|
||||
//
|
||||
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
|
||||
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
|
||||
// choose the negative of the fraction size
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
|
||||
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
|
||||
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
|
||||
default: ResNegNF = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
|
||||
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
|
||||
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
|
||||
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
|
||||
endcase
|
||||
end
|
||||
// determine if the result underflows ??? -> fp
|
||||
// - if the first 1 is shifted out of the result then the result underflows
|
||||
// - can't underflow an integer to fp conversions
|
||||
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
|
||||
assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@ -307,498 +196,7 @@ module fcvt (
|
||||
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
|
||||
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
|
||||
// - otherwise: the floating point input's sign
|
||||
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
|
||||
assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// rounding
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
endmodule
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// 11 - Plus1
|
||||
|
||||
// round to zero - do nothing
|
||||
|
||||
// round to -infinity - Plus1 if negative
|
||||
|
||||
// round to infinity - Plus1 if positive
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 1x - Plus1
|
||||
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
|
||||
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
|
||||
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
|
||||
end
|
||||
`FMT1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
|
||||
end
|
||||
`FMT2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
|
||||
end
|
||||
default: begin
|
||||
ToFpSticky = 1'bx;
|
||||
ToFpRound = 1'bx;
|
||||
ToFpLSBFrac = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
|
||||
end
|
||||
2'h1: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
|
||||
end
|
||||
2'h0: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
|
||||
end
|
||||
2'h2: begin
|
||||
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
|
||||
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
|
||||
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
|
||||
end
|
||||
endcase
|
||||
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
|
||||
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
|
||||
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
|
||||
end
|
||||
|
||||
always_comb
|
||||
// Determine if you add 1
|
||||
case (FrmE)
|
||||
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResSgn;//round down
|
||||
3'b011: CalcPlus1 = ~ResSgn;//round up
|
||||
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
// dont round if exact
|
||||
assign Plus1 = CalcPlus1&(Round|Sticky);
|
||||
|
||||
// shift the 1 to the propper position for rounding
|
||||
// - dont round it converting to integer
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
|
||||
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
|
||||
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
|
||||
default: ShiftedPlus1 = 0;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
|
||||
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
|
||||
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
|
||||
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
|
||||
endcase
|
||||
end
|
||||
// kill calcExp if the result is denormalized
|
||||
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
|
||||
// trim the result's expoent to size
|
||||
assign ResExp = FullResExp[`NE-1:0];
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// flags
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate the flags
|
||||
|
||||
// find the maximum exponent (the exponent and larger overflows)
|
||||
if (`FPSIZES == 1) begin
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
|
||||
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
MaxExpFp = {`NE{1'b1}};
|
||||
end
|
||||
`FMT1: begin
|
||||
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
|
||||
end
|
||||
`FMT2: begin
|
||||
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
|
||||
end
|
||||
default: begin
|
||||
MaxExpFp = 1'bx;
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic [`NE-1:0] MaxExpFp;
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
MaxExpFp = {`Q_NE{1'b1}};
|
||||
end
|
||||
2'h1: begin
|
||||
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
|
||||
end
|
||||
2'h0: begin
|
||||
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
|
||||
end
|
||||
2'h2: begin
|
||||
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
|
||||
end
|
||||
endcase
|
||||
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
|
||||
end
|
||||
|
||||
// if the result exponent is larger then the maximum possible exponent
|
||||
// | and the exponent is positive
|
||||
// | | and the input is not NaN or Infinity
|
||||
// | | |
|
||||
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
|
||||
|
||||
// if the result is denormalized or underflowed
|
||||
// | and the result did not round into normal values
|
||||
// | | and the result is not exact
|
||||
// | | | and the result isn't NaN
|
||||
// | | | |
|
||||
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
|
||||
|
||||
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
|
||||
// if there were bits thrown away
|
||||
// | if overflowed or underflowed
|
||||
// | | and if not a NaN
|
||||
// | | |
|
||||
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
|
||||
|
||||
// if the result is too small to be represented and not 0
|
||||
// | and if the result is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
||||
// if an input was a singaling NaN(and we're using a FP input)
|
||||
// |
|
||||
assign FpInvalid = (XSNaNE&~IntToFp);
|
||||
|
||||
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
|
||||
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
|
||||
// if the input is NaN or infinity
|
||||
// | if the integer result overflows (out of range)
|
||||
// | | if the input was negitive but ouputing to a unsigned number
|
||||
// | | | the result doesn't round to zero
|
||||
// | | | | or the result rounds up out of bounds
|
||||
// | | | | and the result didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
|
||||
// |
|
||||
// or when the positive result rounds up out of range
|
||||
// select the inexact flag to output
|
||||
assign Invalid = ToInt ? IntInvalid : FpInvalid;
|
||||
// pack the flags together
|
||||
// - fp -> int does not set the overflow or underflow flags
|
||||
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// result selection
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine if you shoould kill the result
|
||||
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
|
||||
end else begin
|
||||
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
NaNRes = 1'bx;
|
||||
InfRes = 1'bx;
|
||||
UfRes = 1'bx;
|
||||
Res = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
|
||||
end else begin
|
||||
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
|
||||
if(`IEEE754) begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
|
||||
end else begin
|
||||
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
|
||||
end
|
||||
// determine the infinity result
|
||||
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
|
||||
// - otherwise: output infinity with the correct sign
|
||||
// - kill the infinity singal if the input isn't fp
|
||||
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
// result for when the result is killed i.e. underflowes
|
||||
// - output a rounded 0 with the correct sign
|
||||
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
|
||||
|
||||
// format the result - NaN box single precision (put 1's in the unused msbs)
|
||||
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// choose the floating point result
|
||||
// - if the input is NaN (and using the NaN input) output the NaN result
|
||||
// - if the input is infinity or the output overflows
|
||||
// - kill the InfE signal if the input isn't a floating point value
|
||||
// - if killing the result output the underflow result
|
||||
// - otherwise output the normal result
|
||||
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
|
||||
(XInfE&~IntToFp)|Overflow ? InfRes :
|
||||
KillRes ? UfRes :
|
||||
Res;
|
||||
// *** probably can optimize the negation
|
||||
// select the overflow integer result
|
||||
// - negitive infinity and out of range negitive input
|
||||
// | int | long |
|
||||
// signed | -2^31 | -2^63 |
|
||||
// unsigned | 0 | 0 |
|
||||
//
|
||||
// - positive infinity and out of range negitive input and NaNs
|
||||
// | int | long |
|
||||
// signed | 2^31-1 | 2^63-1 |
|
||||
// unsigned | 2^32-1 | 2^64-1 |
|
||||
//
|
||||
// other: 32 bit unsinged result should be sign extended as if it were a signed number
|
||||
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
|
||||
// round and negate the positive result if needed
|
||||
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
// select the integer output
|
||||
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
|
||||
// - if the input underflows
|
||||
// - if rounding and signed opperation and negitive input, output -1
|
||||
// - otherwise output a rounded 0
|
||||
// - otherwise output the normal result (trmined and sign extended if nessisary)
|
||||
assign CvtIntResE = Invalid ? OfIntRes :
|
||||
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
|
||||
endmodule
|
||||
@ -34,7 +34,7 @@ module fhazard(
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
|
||||
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
|
||||
input logic [4:0] RdM, RdW, // the adress being written to
|
||||
input logic [1:0] FResultSelM, // the result being selected
|
||||
input logic [1:0] FResSelM, // the result being selected
|
||||
output logic FStallD, // stall the decode stage
|
||||
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
|
||||
);
|
||||
@ -47,10 +47,12 @@ module fhazard(
|
||||
FForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
|
||||
|
||||
// if the needed value is in the memory stage - input 1
|
||||
if ((Adr1E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
|
||||
@ -59,7 +61,7 @@ module fhazard(
|
||||
// if the needed value is in the memory stage - input 2
|
||||
if ((Adr2E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
|
||||
@ -68,7 +70,7 @@ module fhazard(
|
||||
// if the needed value is in the memory stage - input 3
|
||||
if ((Adr3E == RdM) & FRegWriteM)
|
||||
// if the result will be FResM (can be taken from the memory stage)
|
||||
if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
|
||||
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // otherwise stall
|
||||
// if the needed value is in the writeback stage
|
||||
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
147
pipelined/src/fpu/flags.sv
Normal file
147
pipelined/src/fpu/flags.sv
Normal file
@ -0,0 +1,147 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module flags(
|
||||
input logic XSgnM,
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic Plus1,
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic XZeroM, YZeroM, // inputs are zero
|
||||
input logic XNaNM, YNaNM, // inputs are NaN
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic Sqrt, // Sqrt?
|
||||
input logic ToInt, // convert to integer
|
||||
input logic IntToFp, // convert integer to floating point
|
||||
input logic Int64, // convert to 64 bit integer
|
||||
input logic Signed, // convert to a signed integer
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent - Cvt
|
||||
input logic CvtOp, // conversion opperation?
|
||||
input logic DivOp, // conversion opperation?
|
||||
input logic FmaOp, // Fma opperation?
|
||||
input logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] RoundExp, // exponent of the normalized sum
|
||||
input logic [1:0] NegResMSBS, // the negitive integer result's most significant bits
|
||||
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
|
||||
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
|
||||
output logic IntInvalid, Invalid, Overflow, Underflow, // flags used to select the res
|
||||
output logic [4:0] PostProcFlgM // flags
|
||||
);
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic Inexact; // inexact flag
|
||||
logic FpInexact; // floating point inexact flag
|
||||
logic IntInexact; // integer inexact flag
|
||||
logic FmaInvalid; // integer invalid flag
|
||||
logic DivInvalid; // integer invalid flag
|
||||
logic DivByZero;
|
||||
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
|
||||
logic ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift")
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
|
||||
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
|
||||
|
||||
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
|
||||
`FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
|
||||
`FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
|
||||
default: ResExpGteMax = 1'bx;
|
||||
endcase
|
||||
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
|
||||
`D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
|
||||
`S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
|
||||
`H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
|
||||
endcase
|
||||
// a left shift of intlen+1 is still in range but any more than that is an overflow
|
||||
// inital: | 64 0's | XLEN |
|
||||
// | 64 0's | XLEN | << 64
|
||||
// | XLEN | 00000... |
|
||||
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// larger or equal if:
|
||||
// - any of the bits after the most significan 1 is one
|
||||
// - the most signifcant in 65 or 33 is still a one in the number and
|
||||
// one of the later bits is one
|
||||
assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
|
||||
end
|
||||
|
||||
// if the result is greater than or equal to the max exponent(not taking into account sign)
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
|
||||
|
||||
// detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
// | the result is denormalized
|
||||
// | | the result is normal and rounded from a denorm
|
||||
// | | | and if given an unbounded exponent the result does not round
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
|
||||
|
||||
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid;
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = ToInt ? IntInexact : FpInexact;
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
// if the input is NaN or infinity
|
||||
// | if the integer res overflows (out of range)
|
||||
// | | if the input was negitive but ouputing to a unsigned number
|
||||
// | | | the res doesn't round to zero
|
||||
// | | | | or the res rounds up out of bounds
|
||||
// | | | | and the res didn't underflow
|
||||
// | | | | |
|
||||
assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
|
||||
// |
|
||||
// or when the positive res rounds up out of range
|
||||
assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
|
||||
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
|
||||
|
||||
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
|
||||
|
||||
|
||||
assign DivByZero = YZeroM&DivOp;
|
||||
|
||||
// Combine flags
|
||||
// - to integer results do not set the underflow or overflow flags
|
||||
assign PostProcFlgM = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
@ -30,73 +30,6 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fma(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic FlushM, // flush the memory stage
|
||||
input logic StallM, // stall memory stage
|
||||
input logic [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
|
||||
input logic XSgnM, YSgnM, // input signs - memory stage
|
||||
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
|
||||
input logic ZDenormE, // is denorm
|
||||
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
|
||||
input logic XNaNM, YNaNM, ZNaNM, // is NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
|
||||
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
|
||||
input logic XInfM, YInfM, ZInfM, // is infinity
|
||||
output logic [`FLEN-1:0] FMAResM, // FMA result
|
||||
output logic [4:0] FMAFlgM); // FMA flags
|
||||
|
||||
//fma/mult/add
|
||||
// fmadd = 000
|
||||
// fmsub = 001
|
||||
// fnmsub = 010 -(a*b)+c
|
||||
// fnmadd = 011 -(a*b)-c
|
||||
// fmul = 100
|
||||
// fadd = 110
|
||||
// fsub = 111
|
||||
|
||||
// signals transfered between pipeline stages
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvZE, InvZM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
|
||||
logic Mult;
|
||||
logic ZDenormM;
|
||||
|
||||
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// E/M pipeline registers
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
|
||||
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
|
||||
|
||||
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
|
||||
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
|
||||
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
|
||||
.FMAResM, .FMAFlgM);
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
//*** in al units before putting into : ? put in a seperate signal
|
||||
|
||||
module fma1(
|
||||
input logic XSgnE, YSgnE, ZSgnE, // input's signs
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
|
||||
@ -111,7 +44,7 @@ module fma1(
|
||||
output logic InvZE, // intert Z
|
||||
output logic ZSgnEffE, // the modified Z sign
|
||||
output logic PSgnE, // the product's sign
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt
|
||||
);
|
||||
|
||||
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
|
||||
@ -151,7 +84,7 @@ module fma1(
|
||||
|
||||
add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
|
||||
|
||||
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
|
||||
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);
|
||||
|
||||
// Choose the positive sum and accompanying LZA result.
|
||||
assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
|
||||
@ -332,7 +265,7 @@ endmodule
|
||||
module loa( //https://ieeexplore.ieee.org/abstract/document/930098
|
||||
input logic [3*`NF+6:0] A, // addend
|
||||
input logic [2*`NF+1:0] P, // product
|
||||
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result
|
||||
);
|
||||
|
||||
logic [3*`NF+6:0] T;
|
||||
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
|
||||
|
||||
|
||||
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
|
||||
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module fma2(
|
||||
|
||||
input logic XSgnM, YSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic Mult, // multiply opperation
|
||||
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count
|
||||
output logic [`FLEN-1:0] FMAResM, // FMA final result
|
||||
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
|
||||
|
||||
logic [`NF-1:0] ResultFrac; // Result fraction
|
||||
logic [`NE-1:0] ResultExp; // Result exponent
|
||||
logic ResultSgn, ResultSgnTmp; // Result sign
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [`NF+1:0] NormSum; // normalized sum
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
logic Sticky, UfSticky; // Sticky bit
|
||||
logic CalcPlus1; // do you add or subtract one for rounding
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic Invalid,Underflow,Overflow; // flags
|
||||
logic Guard, Round; // bits needed to determine rounding
|
||||
logic UfLSBNormSum; // bits needed to determine rounding for underflow flag
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
|
||||
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
|
||||
.CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
|
||||
.FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
|
||||
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
|
||||
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
|
||||
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
|
||||
|
||||
// *** use NF where needed
|
||||
|
||||
endmodule
|
||||
|
||||
module resultsign(
|
||||
input logic [2:0] FrmM,
|
||||
input logic PSgnM, ZSgnEffM,
|
||||
input logic Underflow,
|
||||
input logic InvZM,
|
||||
input logic NegSumM,
|
||||
input logic SumZero,
|
||||
input logic Mult,
|
||||
output logic ResultSgnTmp,
|
||||
output logic ResultSgn
|
||||
);
|
||||
|
||||
logic ZeroSgn;
|
||||
// logic ResultSgnTmp;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// if multiply then Psgn
|
||||
// otherwise psign
|
||||
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
|
||||
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module normalize(
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic [`NE-1:0] ZExpM, // exponent of Z
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic ZDenormM,
|
||||
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
|
||||
output logic [`NF+1:0] NormSum, // normalized sum
|
||||
output logic SumZero, // is the sum zero
|
||||
output logic NormSumSticky, UfSticky, // sticky bits
|
||||
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
output logic ResultDenorm // is the result denormalized
|
||||
);
|
||||
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
|
||||
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|SumM);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
|
||||
|
||||
//convert the sum's exponent into the propper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
assign SumExpTmp = SumExpTmpTmp;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: SumExpTmp = SumExpTmpTmp;
|
||||
`FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
|
||||
`FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
|
||||
default: SumExpTmp = `NE+2'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: SumExpTmp = SumExpTmpTmp;
|
||||
2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// determine if the result is denormalized
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
logic Sum0LEZ, Sum0GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
|
||||
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
|
||||
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
|
||||
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
default: PreResultDenorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
|
||||
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
|
||||
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
|
||||
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
|
||||
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
|
||||
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
|
||||
assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
|
||||
assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// 010. when should be 001.
|
||||
// - shift left one
|
||||
// - add one from exp
|
||||
// - if kill prod dont add to exp
|
||||
|
||||
// Determine if the result is denormal
|
||||
// assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
// - if not denorm add 1 to shift out the leading 1
|
||||
assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
// Normalize the sum
|
||||
assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
|
||||
// LZA correction
|
||||
assign LZAPlus1 = SumShifted[3*`NF+7];
|
||||
assign LZAPlus2 = SumShifted[3*`NF+8];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
|
||||
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
|
||||
|
||||
// Calculate the sticky bit
|
||||
if (`FPSIZES == 1) begin
|
||||
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// 3*NF+5 - NF1 - 3
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
|
||||
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
|
||||
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
|
||||
|
||||
end
|
||||
|
||||
assign UfSticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
|
||||
|
||||
endmodule
|
||||
|
||||
module fmaround(
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic UfSticky, // sticky bit for underlow calculation
|
||||
input logic [`NF+1:0] NormSum, // normalized sum
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
input logic NormSumSticky, // normalized sum's sticky bit
|
||||
input logic ZZeroM, // is Z zero
|
||||
input logic InvZM, // invert Z
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ResultSgnTmp, // the result's sign
|
||||
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] ResultFrac, // Result fraction
|
||||
output logic [`NE-1:0] ResultExp, // Result exponent
|
||||
output logic Sticky, // sticky bit
|
||||
output logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
|
||||
);
|
||||
logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic UfGuard; // guard bit used to caluculate underflow
|
||||
logic UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
|
||||
logic [`NF-1:0] NormSumTruncated; // the normalized sum trimed to fit the mantissa
|
||||
logic UfRound;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Round = NormSum[1];
|
||||
assign LSBNormSum = NormSum[2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfRound = NormSum[0];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/-------------NF---------------,
|
||||
// | NF1 | 2 | |
|
||||
// '-------NF1------^
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
|
||||
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
|
||||
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[0];
|
||||
end
|
||||
`FMT1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`NF1+1];
|
||||
LSBNormSum = NormSum[`NF-`NF1+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`NF1];
|
||||
end
|
||||
`FMT2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`NF2+1];
|
||||
LSBNormSum = NormSum[`NF-`NF2+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`NF2];
|
||||
end
|
||||
default: begin
|
||||
Round = 1'bx;
|
||||
LSBNormSum = 1'bx;
|
||||
UfRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[1];
|
||||
LSBNormSum = NormSum[2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[0];
|
||||
end
|
||||
2'h1: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`D_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`D_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`D_NF];
|
||||
end
|
||||
2'h0: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`S_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`S_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`S_NF];
|
||||
end
|
||||
2'h2: begin
|
||||
// determine guard, round, and least significant bit of the result
|
||||
Round = NormSum[`NF-`H_NF+1];
|
||||
LSBNormSum = NormSum[`NF-`H_NF+2];
|
||||
// used to determine underflow flag
|
||||
UfRound = NormSum[`NF-`H_NF];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
// used to determine underflow flag
|
||||
assign UfLSBNormSum = Round;
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
|
||||
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
|
||||
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Round);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
|
||||
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
|
||||
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
|
||||
default: RoundAdd = (`FLEN+1)'(0);
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
|
||||
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
|
||||
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
|
||||
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
assign NormSumTruncated = NormSum[`NF+1:2];
|
||||
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
|
||||
assign ResultExp = FullResultExp[`NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
module fmaflags(
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XZeroM, YZeroM, // inputs are zero
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
|
||||
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
output logic Invalid, Overflow, Underflow, // flags used to select the result
|
||||
output logic [4:0] FMAFlgM // FMA flags
|
||||
);
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic GtMaxExp; // is exponent greater than the maximum
|
||||
logic UnderflowFlag, Inexact; // flags
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
|
||||
assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
if (`FPSIZES == 1) begin
|
||||
assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
|
||||
`FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
|
||||
`FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
|
||||
default: GtMaxExp = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
|
||||
2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
|
||||
2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
|
||||
2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
|
||||
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
|
||||
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed result isn't outputed
|
||||
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result was rounded up to a normal number
|
||||
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
module resultselect(
|
||||
input logic XSgnM, YSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZZeroM,
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic ResultSgn, // the result's sign
|
||||
input logic CalcPlus1, // rounding bits
|
||||
input logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
input logic Invalid, Overflow, Underflow, // flags
|
||||
input logic ResultDenorm, // is the result denormalized
|
||||
input logic [`NE-1:0] ResultExp, // Result exponent
|
||||
input logic [`NF-1:0] ResultFrac, // Result fraction
|
||||
output logic [`FLEN-1:0] FMAResM // FMA final result
|
||||
);
|
||||
logic InfSgn;
|
||||
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
|
||||
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
|
||||
if (`FPSIZES == 1) begin
|
||||
if(`IEEE754) begin
|
||||
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
|
||||
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
|
||||
if(`IEEE754) begin
|
||||
assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
|
||||
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
|
||||
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
|
||||
YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
|
||||
ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
|
||||
InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
|
||||
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = (`FLEN)'(0);
|
||||
YNaNResult = (`FLEN)'(0);
|
||||
ZNaNResult = (`FLEN)'(0);
|
||||
InvalidResult = (`FLEN)'(0);
|
||||
end else begin
|
||||
XNaNResult = (`FLEN)'(0);
|
||||
end
|
||||
OverflowResult = (`FLEN)'(0);
|
||||
KillProdResult = (`FLEN)'(0);
|
||||
UnderflowResult = (`FLEN)'(0);
|
||||
InfResult = (`FLEN)'(0);
|
||||
NormResult = (`FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
|
||||
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
|
||||
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
|
||||
NormResult = {ResultSgn, ResultExp, ResultFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
|
||||
YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
|
||||
ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
|
||||
InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
|
||||
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
|
||||
YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
|
||||
ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
|
||||
InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
|
||||
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
|
||||
YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
|
||||
ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
|
||||
InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end else begin
|
||||
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
|
||||
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
|
||||
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
if(`IEEE754) begin
|
||||
assign FMAResM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult :
|
||||
XInfM|YInfM|ZInfM ? InfResult :
|
||||
KillProdM ? KillProdResult :
|
||||
Overflow ? OverflowResult :
|
||||
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
|
||||
NormResult;
|
||||
end else begin
|
||||
assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
|
||||
XInfM|YInfM|ZInfM ? InfResult :
|
||||
KillProdM ? KillProdResult :
|
||||
Overflow ? OverflowResult :
|
||||
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
|
||||
NormResult;
|
||||
end
|
||||
|
||||
endmodule
|
||||
127
pipelined/src/fpu/fmashiftcalc.sv
Normal file
127
pipelined/src/fpu/fmashiftcalc.sv
Normal file
@ -0,0 +1,127 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fmashiftcalc(
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic [`NE-1:0] ZExpM, // exponent of Z
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic ZDenormM,
|
||||
output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
output logic SumZero, // is the result denormalized - calculated before LZA corection
|
||||
output logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
|
||||
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
|
||||
);
|
||||
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
|
||||
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//*** insert bias-bias simplification in fcvt.sv/phone pictures
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|SumM);
|
||||
|
||||
// calculate the sum's exponent
|
||||
assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);
|
||||
|
||||
//convert the sum's exponent into the propper percision
|
||||
if (`FPSIZES == 1) begin
|
||||
assign ConvNormSumExp = NormSumExp;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: ConvNormSumExp = NormSumExp;
|
||||
`FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
|
||||
`FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
|
||||
default: ConvNormSumExp = {`NE+2{1'bx}};
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: ConvNormSumExp = NormSumExp;
|
||||
2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
|
||||
2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// determine if the result is denormalized
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
logic Sum0LEZ, Sum0GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
|
||||
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
|
||||
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
|
||||
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
default: PreResultDenorm = 1'bx;
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
|
||||
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
|
||||
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
|
||||
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
|
||||
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
|
||||
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
|
||||
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
|
||||
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
|
||||
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
|
||||
always_comb begin
|
||||
case (FmtM)
|
||||
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
|
||||
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
|
||||
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
|
||||
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// 010. when should be 001.
|
||||
// - shift left one
|
||||
// - add one from exp
|
||||
// - if kill prod dont add to exp
|
||||
|
||||
// Determine if the result is denormal
|
||||
// assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
// - if not denorm add 1 to shift out the leading 1
|
||||
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
|
||||
// set and calculate the shift input and amount
|
||||
assign FmaShiftIn = {3'b0, SumM};
|
||||
assign FmaShiftAmt = FmaNormCntM+DenormShift;
|
||||
endmodule
|
||||
@ -45,17 +45,13 @@ module fpu (
|
||||
output logic FWriteIntE, // integer register write enables
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
|
||||
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
|
||||
output logic [1:0] FResSelW,
|
||||
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
|
||||
);
|
||||
|
||||
//*** make everything FLEN at some point
|
||||
//*** add the 128 bit support to the if statement when needed
|
||||
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
|
||||
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
|
||||
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
|
||||
|
||||
// FPU specifics:
|
||||
// - uses NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
@ -68,24 +64,24 @@ module fpu (
|
||||
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
|
||||
logic FDivStartD, FDivStartE; // Start division or squareroot
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic FWriteIntM; // Write to integer register
|
||||
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
|
||||
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
|
||||
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
|
||||
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
|
||||
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
|
||||
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
|
||||
|
||||
// regfile signals
|
||||
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
|
||||
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
|
||||
logic XSgnM, YSgnM; // input's sign - memory stage
|
||||
logic XSgnM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
|
||||
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
@ -95,7 +91,7 @@ module fpu (
|
||||
logic XNaNQ, YNaNQ; // is the input a NaN - divide
|
||||
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
|
||||
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
|
||||
logic XDenormE, ZDenormE; // is the input denormalized
|
||||
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
|
||||
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
|
||||
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
|
||||
logic XZeroQ, YZeroQ; // is the input zero - divide
|
||||
@ -104,24 +100,43 @@ module fpu (
|
||||
logic XInfQ, YInfQ; // is the input infinity - divide
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic FmtQ;
|
||||
logic FOpCtrlQ;
|
||||
logic FOpCtrlQ;
|
||||
|
||||
// Fma Signals
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvZE, InvZM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
|
||||
logic CvtResSgnE, CvtResSgnM; // the result's sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
logic [4:0] FDivFlgM; // divide/squareroot flags
|
||||
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
|
||||
logic [4:0] FMAFlgM; // FMA/multiply result
|
||||
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
|
||||
logic [`FLEN-1:0] CvtResE; // FP <-> int convert result
|
||||
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
|
||||
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
|
||||
logic [`XLEN-1:0] ClassResE; // classify result
|
||||
logic [`FLEN-1:0] CmpResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`XLEN-1:0] FIntResE; // classify result
|
||||
logic [`FLEN-1:0] FpResM, FpResW; // classify result
|
||||
logic [`FLEN-1:0] PostProcResM; // classify result
|
||||
logic [4:0] PostProcFlgM; // classify result
|
||||
logic [`XLEN-1:0] FCvtIntResM;
|
||||
logic [`FLEN-1:0] CmpFpResE; // compare result
|
||||
logic [`XLEN-1:0] CmpIntResE; // compare result
|
||||
logic CmpNVE; // compare invalid flag (Not Valid)
|
||||
logic [`FLEN-1:0] SgnResE; // sign injection result
|
||||
logic [`FLEN-1:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
|
||||
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
|
||||
logic [`XLEN-1:0] FIntResE;
|
||||
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
|
||||
// other signals
|
||||
logic FDivSqrtDoneE; // is divide done
|
||||
@ -133,10 +148,20 @@ module fpu (
|
||||
|
||||
// DECODE STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// |||||||||||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// |||||||||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate FP control signals
|
||||
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
|
||||
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
|
||||
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
|
||||
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
|
||||
.FmtD, .FrmD, .FWriteIntD);
|
||||
|
||||
// FP register file
|
||||
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
|
||||
@ -150,20 +175,31 @@ module fpu (
|
||||
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
|
||||
|
||||
// EXECUTION STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ||||||||||||
|
||||
// |||
|
||||
// |||
|
||||
// |||||||||
|
||||
// |||
|
||||
// |||
|
||||
// ||||||||||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Hazard unit for FPU
|
||||
// - determines if any forwarding or stalls are needed
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
|
||||
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
|
||||
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
|
||||
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
|
||||
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
|
||||
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
|
||||
|
||||
|
||||
generate
|
||||
@ -178,7 +214,7 @@ module fpu (
|
||||
endgenerate
|
||||
|
||||
|
||||
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
|
||||
|
||||
// Force Z to be 0 for multiply instructions
|
||||
generate
|
||||
@ -201,21 +237,12 @@ module fpu (
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
|
||||
|
||||
// FMA
|
||||
// - two stage FMA
|
||||
// - execute stage - multiplication and addend shifting
|
||||
// - memory stage - addition and rounding
|
||||
// - handles FMA and multiply instructions
|
||||
fma fma (.clk, .reset, .FlushM, .StallM,
|
||||
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
|
||||
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
|
||||
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
|
||||
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
|
||||
.FOpCtrlE,
|
||||
.FmtE, .FmtM, .FrmM,
|
||||
.FMAFlgM, .FMAResM);
|
||||
|
||||
// fma - does multiply, add, and multiply-add instructions
|
||||
fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
|
||||
.XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE,
|
||||
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE,
|
||||
.ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
// fpdivsqrt using Goldschmidt's iteration
|
||||
if(`FLEN == 64) begin
|
||||
@ -245,11 +272,14 @@ module fpu (
|
||||
|
||||
// other FP execution units
|
||||
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
|
||||
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
|
||||
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
|
||||
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
|
||||
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
|
||||
|
||||
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE,
|
||||
.FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE,
|
||||
.CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE,
|
||||
.CvtLzcInE);
|
||||
|
||||
// data to be stored in memory - to IEU
|
||||
// - FP uses NaN-blocking format
|
||||
@ -269,16 +299,16 @@ module fpu (
|
||||
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
|
||||
endgenerate
|
||||
// select a result that may be written to the FP register
|
||||
mux4 #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
|
||||
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
|
||||
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
|
||||
|
||||
// select the result that may be written to the integer register - to IEU
|
||||
if (`FLEN>`XLEN)
|
||||
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
assign IntSrcXE = FSrcXE[`XLEN-1:0];
|
||||
else
|
||||
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
|
||||
CvtIntResE, FIntResSelE, FIntResE);
|
||||
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
|
||||
|
||||
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
|
||||
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
|
||||
// *** make sure the fpu matches the chapter diagram
|
||||
|
||||
@ -286,33 +316,68 @@ module fpu (
|
||||
|
||||
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
|
||||
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
|
||||
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
|
||||
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
|
||||
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
|
||||
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResultSelE, FrmE, FmtE},
|
||||
{FRegWriteM, FResultSelM, FrmM, FmtM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
|
||||
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
|
||||
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
|
||||
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
|
||||
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
|
||||
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
|
||||
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
|
||||
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
|
||||
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
|
||||
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
|
||||
|
||||
// BEGIN MEMORY STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ||| |||
|
||||
// |||||| ||||||
|
||||
// ||| ||| ||| |||
|
||||
// ||| ||||| |||
|
||||
// ||| ||| |||
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
|
||||
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
|
||||
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
|
||||
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
|
||||
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
|
||||
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
|
||||
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
|
||||
mux2 #(`FLEN) FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
|
||||
// M/W pipe registers
|
||||
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
|
||||
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
|
||||
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
|
||||
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
|
||||
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResultSelM, FmtM},
|
||||
{FRegWriteW, FResultSelW, FmtW});
|
||||
{FRegWriteM, FResSelM, FmtM},
|
||||
{FRegWriteW, FResSelW, FmtW});
|
||||
|
||||
// BEGIN WRITEBACK STAGE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ||| |||
|
||||
// ||| |||
|
||||
// ||| ||| |||
|
||||
// ||| ||||| |||
|
||||
// ||| ||| ||| |||
|
||||
// |||||| ||||||
|
||||
// ||| |||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// put ReadData into NaN-blocking format
|
||||
// - if there are any unsused bits the most significant bits are filled with 1s
|
||||
// - for load instruction
|
||||
@ -328,6 +393,6 @@ module fpu (
|
||||
endgenerate
|
||||
|
||||
// select the result to be written to the FP register
|
||||
if(`FLEN>=64)
|
||||
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
|
||||
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
|
||||
|
||||
endmodule // fpu
|
||||
|
||||
@ -46,7 +46,7 @@ module fsgninj (
|
||||
//
|
||||
|
||||
// calculate the result's sign
|
||||
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
|
||||
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
|
||||
|
||||
// format final result based on precision
|
||||
// - uses NaN-blocking format
|
||||
@ -64,7 +64,7 @@ module fsgninj (
|
||||
`FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
|
||||
`FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
|
||||
`FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]};
|
||||
default: SgnResE = 0;
|
||||
default: SgnResE = {`FLEN{1'bx}};
|
||||
endcase
|
||||
|
||||
else if (`FPSIZES == 4)
|
||||
|
||||
29
pipelined/src/fpu/lzacorrection.sv
Normal file
29
pipelined/src/fpu/lzacorrection.sv
Normal file
@ -0,0 +1,29 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module lzacorrection(
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
input logic FmaOp,
|
||||
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
|
||||
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
|
||||
input logic KillProdM, // is the product set to zero
|
||||
input logic SumZero,
|
||||
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
|
||||
output logic [`NE+1:0] SumExp // exponent of the normalized sum
|
||||
);
|
||||
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic ResDenorm; // is the result denormalized
|
||||
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
|
||||
|
||||
// LZA correction
|
||||
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
|
||||
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
|
||||
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
|
||||
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
|
||||
// Determine sum's exponent
|
||||
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
|
||||
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
|
||||
// recalculate if the result is denormalized
|
||||
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
|
||||
|
||||
endmodule
|
||||
46
pipelined/src/fpu/normshift.sv
Normal file
46
pipelined/src/fpu/normshift.sv
Normal file
@ -0,0 +1,46 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
|
||||
// convert shift
|
||||
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
|
||||
// process:
|
||||
// - start - CalcExp = 1 + XExp - Largest Bias
|
||||
// | `XLEN zeros | Mantissa | 0's if nessisary |
|
||||
//
|
||||
// - shift left 1 (1)
|
||||
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
|
||||
// . <- binary point
|
||||
//
|
||||
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
|
||||
// | 0's | Mantissa | 0's if nessisary |
|
||||
// | keep |
|
||||
//
|
||||
// fp -> fp:
|
||||
// - if result is denormalized or underflowed:
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
|
||||
// process:
|
||||
// - start
|
||||
// | mantissa | 0's |
|
||||
//
|
||||
// - shift right by NF-1 (NF-1)
|
||||
// | `NF-1 zeros | mantissa | 0's |
|
||||
//
|
||||
// - shift left by CalcExp = XExp - Largest bias + new bias
|
||||
// | 0's | mantissa | 0's |
|
||||
// | keep |
|
||||
//
|
||||
// - if the input is denormalized:
|
||||
// | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
//
|
||||
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
|
||||
// - plus 1 to shift out the first 1
|
||||
|
||||
module normshift(
|
||||
input logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt, // normalization shift count
|
||||
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // is the sum zero
|
||||
output logic [`NORMSHIFTSZ-1:0] Shifted // is the sum zero
|
||||
);
|
||||
assign Shifted = ShiftIn << ShiftAmt;
|
||||
|
||||
endmodule
|
||||
203
pipelined/src/fpu/postprocess.sv
Normal file
203
pipelined/src/fpu/postprocess.sv
Normal file
@ -0,0 +1,203 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: Katherine Parry, David Harris
|
||||
// Modified: 6/23/2021
|
||||
//
|
||||
// Purpose: Floating point multiply-accumulate of configurable size
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module postprocess(
|
||||
input logic XSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
|
||||
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
|
||||
input logic [3*`NF+5:0] SumM, // the positive sum
|
||||
input logic NegSumM, // was the sum negitive
|
||||
input logic InvZM, // do you invert Z
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZSgnEffM, // the modified Z sign - depends on instruction
|
||||
input logic PSgnM, // the product's sign
|
||||
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic CvtResDenormUfM,
|
||||
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
|
||||
input logic CvtResSgnM, // the result's sign
|
||||
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
|
||||
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic IntZeroM, // is the input zero
|
||||
input logic [1:0] PostProcSelM, // select result to be written to fp register
|
||||
output logic [`FLEN-1:0] PostProcResM, // FMA final result
|
||||
output logic [4:0] PostProcFlgM,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
|
||||
);
|
||||
|
||||
|
||||
|
||||
logic [`NF-1:0] ResFrac; // Result fraction
|
||||
logic [`NE-1:0] ResExp; // Result exponent
|
||||
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
|
||||
logic [`NE+1:0] SumExp; // exponent of the normalized sum
|
||||
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
|
||||
logic SumZero; // is the sum zero
|
||||
logic Sticky; // Sticky bit
|
||||
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic Round; // bits needed to determine rounding
|
||||
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
|
||||
logic Mult; // multiply opperation
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
|
||||
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
|
||||
logic Plus1; // add one to the final result?
|
||||
logic IntInvalid, Overflow, Underflow, Invalid; // flags
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic [`NE+1:0] RoundExp;
|
||||
logic [1:0] NegResMSBS;
|
||||
logic CvtOp;
|
||||
logic FmaOp;
|
||||
logic CvtResUf;
|
||||
logic DivOp;
|
||||
logic InfIn;
|
||||
logic ResSgn;
|
||||
logic NaNIn;
|
||||
logic UfLSBRes;
|
||||
logic Sqrt;
|
||||
logic [`FMTBITS-1:0] OutFmt;
|
||||
|
||||
// signals to help readability
|
||||
assign Signed = FOpCtrlM[0];
|
||||
assign Int64 = FOpCtrlM[1];
|
||||
assign IntToFp = FOpCtrlM[2];
|
||||
assign ToInt = FWriteIntM;
|
||||
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
|
||||
assign CvtOp = (PostProcSelM == 2'b00);
|
||||
assign FmaOp = (PostProcSelM == 2'b10);
|
||||
assign DivOp = (PostProcSelM == 2'b01);
|
||||
assign Sqrt = FOpCtrlM[0];
|
||||
|
||||
// is there an input of infinity or NaN being used
|
||||
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
|
||||
assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: FmtM contains the percision of the output
|
||||
if (`FPSIZES == 2)
|
||||
assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT);
|
||||
else if (`FPSIZES == 3 | `FPSIZES == 4)
|
||||
assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0];
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,
|
||||
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
|
||||
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
2'b10: begin // fma
|
||||
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
|
||||
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
|
||||
end
|
||||
2'b00: begin // cvt
|
||||
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
|
||||
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
|
||||
end
|
||||
2'b01: begin //div
|
||||
ShiftAmt = 0;//{DivShiftAmt};
|
||||
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
|
||||
end
|
||||
default: begin
|
||||
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
|
||||
ShiftIn = {`NORMSHIFTSZ{1'bx}};
|
||||
end
|
||||
endcase
|
||||
|
||||
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
|
||||
.SumZero, .Shifted, .SumExp, .CorrShifted);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
|
||||
.InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
|
||||
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
|
||||
.ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM,
|
||||
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
|
||||
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid,
|
||||
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
|
||||
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid,
|
||||
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
|
||||
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
|
||||
.FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
|
||||
|
||||
endmodule
|
||||
282
pipelined/src/fpu/resultselect.sv
Normal file
282
pipelined/src/fpu/resultselect.sv
Normal file
@ -0,0 +1,282 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module resultselect(
|
||||
input logic XSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn,
|
||||
input logic XZeroM,
|
||||
input logic IntZeroM,
|
||||
input logic NaNIn,
|
||||
input logic IntToFp,
|
||||
input logic Int64,
|
||||
input logic Signed,
|
||||
input logic CvtOp,
|
||||
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
|
||||
input logic FmaOp,
|
||||
input logic Plus1,
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
input logic ZDenormM, // is the original precision denormalized
|
||||
input logic ZZeroM,
|
||||
input logic ResSgn, // the res's sign
|
||||
input logic [`FLEN:0] RoundAdd, // how much to add to the res
|
||||
input logic IntInvalid, Invalid, Overflow, // flags
|
||||
input logic CvtResUf,
|
||||
input logic [`NE-1:0] ResExp, // Res exponent
|
||||
input logic [`NE+1:0] FullResExp, // Res exponent
|
||||
input logic [`NF-1:0] ResFrac, // Res fraction
|
||||
output logic [`FLEN-1:0] PostProcResM, // final res
|
||||
output logic [1:0] NegResMSBS,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // final res
|
||||
);
|
||||
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
|
||||
logic OfResMax;
|
||||
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
|
||||
logic [`XLEN+1:0] NegRes; // the negation of the result
|
||||
logic KillRes;
|
||||
|
||||
|
||||
// does the overflow result output the maximum normalized floating point number
|
||||
// output infinity if the input is infinity
|
||||
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
|
||||
|
||||
if (`FPSIZES == 1) begin
|
||||
|
||||
//NaN res selection depending on standard
|
||||
if(`IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
assign NormRes = {ResSgn, ResExp, ResFrac};
|
||||
|
||||
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
|
||||
if(`IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
|
||||
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
|
||||
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
|
||||
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
NormRes = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
`FMT1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
|
||||
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
|
||||
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
|
||||
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
|
||||
end
|
||||
`FMT2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
|
||||
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
|
||||
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
|
||||
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
|
||||
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = (`FLEN)'(0);
|
||||
YNaNRes = (`FLEN)'(0);
|
||||
ZNaNRes = (`FLEN)'(0);
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
end else begin
|
||||
InvalidRes = (`FLEN)'(0);
|
||||
end
|
||||
OfRes = (`FLEN)'(0);
|
||||
KillProdRes = (`FLEN)'(0);
|
||||
UfRes = (`FLEN)'(0);
|
||||
NormRes = (`FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
|
||||
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
|
||||
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
|
||||
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
|
||||
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
|
||||
NormRes = {ResSgn, ResExp, ResFrac};
|
||||
end
|
||||
2'h1: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
|
||||
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
|
||||
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
|
||||
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
|
||||
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
|
||||
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
|
||||
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
|
||||
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(`IEEE754) begin
|
||||
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
|
||||
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
|
||||
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
|
||||
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
|
||||
|
||||
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
|
||||
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
|
||||
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
|
||||
end
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// determine if you shoould kill the res - Cvt
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
|
||||
|
||||
if(`IEEE754) begin
|
||||
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
|
||||
YNaNM&~CvtOp ? YNaNRes :
|
||||
ZNaNM&FmaOp ? ZNaNRes :
|
||||
Invalid ? InvalidRes :
|
||||
Overflow|InfIn ? OfRes :
|
||||
KillProdM&FmaOp ? KillProdRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end else begin
|
||||
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
|
||||
Overflow|InfIn ? OfRes :
|
||||
KillProdM&FmaOp ? KillProdRes :
|
||||
KillRes ? UfRes :
|
||||
NormRes;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ||||||||||| ||| ||| |||||||||||||
|
||||
// ||| |||||| ||| |||
|
||||
// ||| ||| ||| ||| |||
|
||||
// ||| ||| |||||| |||
|
||||
// ||||||||||| ||| ||| |||
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// *** probably can optimize the negation
|
||||
// select the overflow integer res
|
||||
// - negitive infinity and out of range negitive input
|
||||
// | int | long |
|
||||
// signed | -2^31 | -2^63 |
|
||||
// unsigned | 0 | 0 |
|
||||
//
|
||||
// - positive infinity and out of range negitive input and NaNs
|
||||
// | int | long |
|
||||
// signed | 2^31-1 | 2^63-1 |
|
||||
// unsigned | 2^32-1 | 2^64-1 |
|
||||
//
|
||||
// other: 32 bit unsinged res should be sign extended as if it were a signed number
|
||||
assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
|
||||
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
|
||||
XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
|
||||
{`XLEN{1'b1}};// unsigned positive
|
||||
|
||||
// round and negate the positive res if needed
|
||||
assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
|
||||
|
||||
//*** false critical path probably
|
||||
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
|
||||
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
|
||||
|
||||
// select the integer output
|
||||
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
|
||||
// - if the input underflows
|
||||
// - if rounding and signed opperation and negitive input, output -1
|
||||
// - otherwise output a rounded 0
|
||||
// - otherwise output the normal res (trmined and sign extended if nessisary)
|
||||
assign FCvtIntResM = IntInvalid ? OfIntRes :
|
||||
CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
|
||||
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
|
||||
endmodule
|
||||
50
pipelined/src/fpu/resultsign.sv
Normal file
50
pipelined/src/fpu/resultsign.sv
Normal file
@ -0,0 +1,50 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module resultsign(
|
||||
input logic [2:0] FrmM,
|
||||
input logic PSgnM, ZSgnEffM,
|
||||
input logic InvZM,
|
||||
input logic ZInfM,
|
||||
input logic InfIn,
|
||||
input logic NegSumM,
|
||||
input logic [1:0] PostProcSelM,
|
||||
input logic [`NE+1:0] SumExp,
|
||||
input logic SumZero,
|
||||
input logic Mult,
|
||||
input logic Round,
|
||||
input logic Sticky,
|
||||
input logic CvtResSgnM,
|
||||
output logic ResSgn
|
||||
);
|
||||
|
||||
logic ZeroSgn;
|
||||
logic InfSgn;
|
||||
logic FmaResSgn;
|
||||
logic FmaResSgnTmp;
|
||||
logic Underflow;
|
||||
// logic ResultSgnTmp;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// if multiply then Psgn
|
||||
// otherwise psign
|
||||
assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
|
||||
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
|
||||
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
|
||||
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
|
||||
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
2'b10: ResSgn = FmaResSgn; // fma
|
||||
2'b00: ResSgn = CvtResSgnM; // cvt
|
||||
2'b01: ResSgn = 0; // divide
|
||||
default: ResSgn = 1'bx;
|
||||
endcase
|
||||
endmodule
|
||||
316
pipelined/src/fpu/round.sv
Normal file
316
pipelined/src/fpu/round.sv
Normal file
@ -0,0 +1,316 @@
|
||||
`include "wally-config.vh"
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
|
||||
|
||||
module round(
|
||||
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic FmaOp,
|
||||
input logic [1:0] PostProcSelM,
|
||||
input logic CvtResDenormUfM,
|
||||
input logic ToInt,
|
||||
input logic CvtOp,
|
||||
input logic CvtResUf,
|
||||
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
|
||||
input logic AddendStickyM, // addend's sticky bit
|
||||
input logic ZZeroM, // is Z zero
|
||||
input logic InvZM, // invert Z
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic ResSgn, // the result's sign
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
output logic UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] ResFrac, // Result fraction
|
||||
output logic [`NE-1:0] ResExp, // Result exponent
|
||||
output logic Sticky, // sticky bit
|
||||
output logic [`NE+1:0] RoundExp,
|
||||
output logic Plus1,
|
||||
output logic [`FLEN:0] RoundAdd, // how much to add to the result
|
||||
output logic Round, UfLSBRes // bits needed to calculate rounding
|
||||
);
|
||||
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
|
||||
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
|
||||
logic NormSumSticky; // normalized sum's sticky bit
|
||||
logic UfSticky; // sticky bit for underlow calculation
|
||||
logic [`NF-1:0] RoundFrac;
|
||||
logic FpRes, IntRes;
|
||||
logic UfRound;
|
||||
logic FpRound, FpLSBRes, FpUfRound;
|
||||
logic CalcPlus1, FpPlus1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - plus 1 otherwise
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - Plus 1 otherwise
|
||||
|
||||
assign IntRes = CvtOp & ToInt;
|
||||
assign FpRes = ~IntRes;
|
||||
|
||||
// sticky bit calculation
|
||||
if (`FPSIZES == 1) begin
|
||||
|
||||
// 1: XLEN > NF
|
||||
// | XLEN |
|
||||
// | NF |1|1|
|
||||
// ^ ^ if floating point result
|
||||
// ^ if not an FMA result
|
||||
if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN
|
||||
if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// XLEN is either 64 or 32
|
||||
// so half and single are always smaller then XLEN
|
||||
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
// 1: XLEN > NF > NF1
|
||||
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
// Quad precision will always be greater than XLEN
|
||||
// 2: NF > XLEN > NF1
|
||||
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
|
||||
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
|
||||
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (`FPSIZES == 1) begin
|
||||
assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
|
||||
assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
|
||||
assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
|
||||
assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
|
||||
assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
`FMT: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
|
||||
end
|
||||
`FMT1: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
|
||||
end
|
||||
`FMT2: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpRound = 1'bx;
|
||||
FpLSBRes = 1'bx;
|
||||
FpUfRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
|
||||
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
|
||||
FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
|
||||
assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
|
||||
assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
|
||||
|
||||
// used to determine underflow flag
|
||||
assign UfLSBRes = FpRound;
|
||||
// determine sticky
|
||||
assign Sticky = UfSticky | UfRound;
|
||||
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
|
||||
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
|
||||
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
|
||||
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Round);
|
||||
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
|
||||
assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Round);
|
||||
|
||||
// Compute rounded result
|
||||
if (`FPSIZES == 1) begin
|
||||
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (`FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
|
||||
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
|
||||
end else if (`FPSIZES == 3) begin
|
||||
always_comb begin
|
||||
case (OutFmt)
|
||||
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
|
||||
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
|
||||
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
|
||||
default: RoundAdd = (`FLEN+1)'(0);
|
||||
endcase
|
||||
end
|
||||
|
||||
end else if (`FPSIZES == 4) begin
|
||||
always_comb begin
|
||||
case (OutFmt)
|
||||
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
|
||||
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
|
||||
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
|
||||
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// determine the result to be roundned
|
||||
assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
2'b10: RoundExp = SumExp; // fma
|
||||
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
|
||||
2'b01: RoundExp = 0; // divide
|
||||
default: RoundExp = 0;
|
||||
endcase
|
||||
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
|
||||
assign ResExp = FullResExp[`NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
||||
@ -98,7 +98,7 @@ module unpackinput (
|
||||
`FMT: BadNaNBox = 0;
|
||||
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
|
||||
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
|
||||
default: BadNaNBox = 0;
|
||||
default: BadNaNBox = 1'bx;
|
||||
endcase
|
||||
|
||||
// extract the sign bit
|
||||
@ -107,7 +107,7 @@ module unpackinput (
|
||||
`FMT: Sgn = In[`FLEN-1];
|
||||
`FMT1: Sgn = In[`LEN1-1];
|
||||
`FMT2: Sgn = In[`LEN2-1];
|
||||
default: Sgn = 0;
|
||||
default: Sgn = 1'bx;
|
||||
endcase
|
||||
|
||||
// extract the fraction
|
||||
@ -116,7 +116,7 @@ module unpackinput (
|
||||
`FMT: Frac = In[`NF-1:0];
|
||||
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
|
||||
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
|
||||
default: Frac = 0;
|
||||
default: Frac = {`NF{1'bx}};
|
||||
endcase
|
||||
|
||||
// is the exponent non-zero
|
||||
@ -125,7 +125,7 @@ module unpackinput (
|
||||
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
|
||||
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
|
||||
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
|
||||
default: ExpNonZero = 0;
|
||||
default: ExpNonZero = 1'bx;
|
||||
endcase
|
||||
|
||||
// example double to single conversion:
|
||||
@ -142,7 +142,7 @@ module unpackinput (
|
||||
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
|
||||
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
|
||||
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
|
||||
default: Exp = 0;
|
||||
default: Exp = {`NE{1'bx}};
|
||||
endcase
|
||||
|
||||
// is the exponent all 1's
|
||||
@ -151,7 +151,7 @@ module unpackinput (
|
||||
`FMT: ExpMax = &In[`FLEN-2:`NF];
|
||||
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
|
||||
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
|
||||
default: ExpMax = 0;
|
||||
default: ExpMax = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
|
||||
|
||||
@ -61,6 +61,8 @@ module datapath (
|
||||
(* mark_debug = "true" *) input logic RegWriteW,
|
||||
input logic SquashSCW,
|
||||
input logic [2:0] ResultSrcW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
input logic [1:0] FResSelW,
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
|
||||
@ -120,14 +122,17 @@ module datapath (
|
||||
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
|
||||
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
|
||||
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
|
||||
// floating point interactions: fcvt, fp stores
|
||||
if (`F_SUPPORTED) begin:fpmux
|
||||
logic [`XLEN-1:0] IFCvtResultW;
|
||||
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
|
||||
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
|
||||
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
end else begin:fpmux
|
||||
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
|
||||
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
|
||||
end
|
||||
|
||||
// handle Store Conditional result if atomic extension supported
|
||||
|
||||
@ -61,6 +61,8 @@ module ieu (
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
|
||||
input logic [1:0] FResSelW,
|
||||
input logic [`XLEN-1:0] FCvtIntResW,
|
||||
output logic [4:0] RdW,
|
||||
output logic [`XLEN-1:0] ReadDataW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
@ -105,8 +107,8 @@ module ieu (
|
||||
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
|
||||
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
|
||||
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
|
||||
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
|
||||
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
|
||||
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
|
||||
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
|
||||
|
||||
forward fw(
|
||||
|
||||
@ -38,9 +38,13 @@ module ifu (
|
||||
// Bus interface
|
||||
(* mark_debug = "true" *) input logic [`XLEN-1:0] IFUBusHRDATA,
|
||||
(* mark_debug = "true" *) input logic IFUBusAck,
|
||||
(* mark_debug = "true" *) input logic IFUBusInit,
|
||||
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUBusAdr,
|
||||
(* mark_debug = "true" *) output logic IFUBusRead,
|
||||
(* mark_debug = "true" *) output logic IFUStallF,
|
||||
(* mark_debug = "true" *) output logic [2:0] IFUBurstType,
|
||||
(* mark_debug = "true" *) output logic [1:0] IFUTransType,
|
||||
(* mark_debug = "true" *) output logic IFUTransComplete,
|
||||
(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF,
|
||||
// Execute
|
||||
output logic [`XLEN-1:0] PCLinkE,
|
||||
@ -201,8 +205,8 @@ module ifu (
|
||||
|
||||
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
busdp(.clk, .reset,
|
||||
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusWriteCrit(),
|
||||
.LSUBusRead(IFUBusRead), .LSUBusSize(),
|
||||
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
|
||||
.LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
|
||||
.LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr),
|
||||
.WordCount(),
|
||||
.DCacheFetchLine(ICacheFetchLine),
|
||||
|
||||
@ -40,9 +40,13 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
// bus interface
|
||||
input logic [`XLEN-1:0] LSUBusHRDATA,
|
||||
input logic LSUBusAck,
|
||||
input logic LSUBusInit,
|
||||
output logic LSUBusWrite,
|
||||
output logic LSUBusRead,
|
||||
output logic [2:0] LSUBusSize,
|
||||
output logic [2:0] LSUBusSize,
|
||||
output logic [2:0] LSUBurstType,
|
||||
output logic [1:0] LSUTransType, // For AHBLite
|
||||
output logic LSUTransComplete,
|
||||
input logic [2:0] LSUFunct3M,
|
||||
output logic [`PA_BITS-1:0] LSUBusAdr, // ** change name to HADDR to make ahb lite.
|
||||
output logic [LOGWPL-1:0] WordCount,
|
||||
@ -66,13 +70,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
|
||||
localparam integer WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
|
||||
logic [`PA_BITS-1:0] LocalLSUBusAdr;
|
||||
logic [LOGWPL-1:0] WordCountDelayed;
|
||||
|
||||
|
||||
// *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem
|
||||
// *** better name than DCacheBusWriteData
|
||||
genvar index;
|
||||
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
|
||||
logic [WORDSPERLINE-1:0] CaptureWord;
|
||||
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCount);
|
||||
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
|
||||
flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
|
||||
.q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN]));
|
||||
end
|
||||
@ -83,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
|
||||
|
||||
busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
|
||||
.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
|
||||
.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
|
||||
.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
|
||||
.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
|
||||
.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
|
||||
endmodule
|
||||
|
||||
@ -41,6 +41,7 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
input logic DCacheFetchLine,
|
||||
input logic DCacheWriteLine,
|
||||
input logic LSUBusAck,
|
||||
input logic LSUBusInit, // This might be better as LSUBusLock, or to send this using LSUBusAck.
|
||||
input logic CPUBusy,
|
||||
input logic CacheableM,
|
||||
|
||||
@ -48,10 +49,13 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
output logic LSUBusWrite,
|
||||
output logic LSUBusWriteCrit,
|
||||
output logic LSUBusRead,
|
||||
output logic [2:0] LSUBurstType,
|
||||
output logic LSUTransComplete,
|
||||
output logic [1:0] LSUTransType,
|
||||
output logic DCacheBusAck,
|
||||
output logic BusCommittedM,
|
||||
output logic SelUncachedAdr,
|
||||
output logic [LOGWPL-1:0] WordCount);
|
||||
output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
|
||||
|
||||
|
||||
|
||||
@ -61,7 +65,8 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
logic CntReset;
|
||||
logic WordCountFlag;
|
||||
logic [LOGWPL-1:0] NextWordCount;
|
||||
logic UnCachedAccess;
|
||||
logic UnCachedAccess, UnCachedRW;
|
||||
logic [2:0] LocalBurstType;
|
||||
|
||||
|
||||
typedef enum logic [2:0] {STATE_BUS_READY,
|
||||
@ -75,18 +80,27 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
|
||||
(* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;
|
||||
|
||||
|
||||
// Used to send address for address stage of AHB.
|
||||
flopenr #(LOGWPL)
|
||||
WordCountReg(.clk(clk),
|
||||
.reset(reset | CntReset),
|
||||
.en(CntEn),
|
||||
.d(NextWordCount),
|
||||
.q(WordCount));
|
||||
.q(WordCount));
|
||||
|
||||
// Used to store data from data phase of AHB.
|
||||
flopenr #(LOGWPL)
|
||||
WordCountDelayedReg(.clk(clk),
|
||||
.reset(reset | CntReset),
|
||||
.en(CntEn),
|
||||
.d(WordCount),
|
||||
.q(WordCountDelayed));
|
||||
|
||||
assign NextWordCount = WordCount + 1'b1;
|
||||
|
||||
assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]);
|
||||
assign CntEn = PreCntEn & LSUBusAck;
|
||||
assign PreCntEn = (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE);
|
||||
assign WordCountFlag = (WordCountDelayed == WordCountThreshold[LOGWPL-1:0]); // Detect when we are waiting on the final access.
|
||||
assign CntEn = (PreCntEn & LSUBusAck | (LSUBusInit)) & ~WordCountFlag & ~UnCachedRW; // Want to count when doing cache accesses and we aren't wrapping up.
|
||||
|
||||
assign UnCachedAccess = ~CACHE_ENABLED | ~CacheableM;
|
||||
|
||||
@ -120,14 +134,29 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case(WordCountThreshold)
|
||||
0: LocalBurstType = 3'b000;
|
||||
3: LocalBurstType = 3'b011; // INCR4
|
||||
7: LocalBurstType = 3'b101; // INCR8
|
||||
15: LocalBurstType = 3'b111; // INCR16
|
||||
default: LocalBurstType = 3'b001; // INCR without end.
|
||||
endcase
|
||||
end
|
||||
|
||||
assign CntReset = BusCurrState == STATE_BUS_READY;
|
||||
// Would these be better as always_comb statements or muxes?
|
||||
assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
|
||||
assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
|
||||
// Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
|
||||
assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00;
|
||||
// Reset if we aren't initiating a transaction or if we are finishing a transaction.
|
||||
assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete;
|
||||
|
||||
assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((UnCachedAccess & (|LSURWM)) | DCacheFetchLine | DCacheWriteLine)) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_READ) |
|
||||
(BusCurrState == STATE_BUS_FETCH) |
|
||||
(BusCurrState == STATE_BUS_WRITE);
|
||||
assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
|
||||
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
|
||||
(BusCurrState == STATE_BUS_UNCACHED_WRITE);
|
||||
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
|
||||
@ -139,6 +168,10 @@ module busfsm #(parameter integer WordCountThreshold,
|
||||
(BusCurrState == STATE_BUS_UNCACHED_READ);
|
||||
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);
|
||||
|
||||
|
||||
// Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
|
||||
assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead;
|
||||
|
||||
assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LSUBusAck) |
|
||||
(BusCurrState == STATE_BUS_WRITE & WordCountFlag & LSUBusAck);
|
||||
assign BusCommittedM = BusCurrState != STATE_BUS_READY;
|
||||
|
||||
@ -66,9 +66,13 @@ module lsu (
|
||||
(* mark_debug = "true" *) output logic LSUBusRead,
|
||||
(* mark_debug = "true" *) output logic LSUBusWrite,
|
||||
(* mark_debug = "true" *) input logic LSUBusAck,
|
||||
(* mark_debug = "true" *) input logic LSUBusInit,
|
||||
(* mark_debug = "true" *) input logic [`XLEN-1:0] LSUBusHRDATA,
|
||||
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUBusHWDATA,
|
||||
(* mark_debug = "true" *) output logic [2:0] LSUBusSize,
|
||||
(* mark_debug = "true" *) output logic [2:0] LSUBurstType,
|
||||
(* mark_debug = "true" *) output logic [1:0] LSUTransType,
|
||||
(* mark_debug = "true" *) output logic LSUTransComplete,
|
||||
// page table walker
|
||||
input logic [`XLEN-1:0] SATP_REGW, // from csr
|
||||
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
|
||||
@ -211,7 +215,7 @@ module lsu (
|
||||
|
||||
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
|
||||
.clk, .reset,
|
||||
.LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize,
|
||||
.LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
|
||||
.WordCount, .LSUBusWriteCrit,
|
||||
.LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine,
|
||||
.DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM,
|
||||
|
||||
@ -48,7 +48,7 @@ module gpio (
|
||||
|
||||
logic [31:0] input0d, input1d, input2d, input3d;
|
||||
logic [31:0] input_val, input_en, output_en, output_val;
|
||||
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip;
|
||||
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor;
|
||||
|
||||
logic initTrans, memwrite;
|
||||
logic [7:0] entry, entryd;
|
||||
@ -91,6 +91,7 @@ module gpio (
|
||||
high_ip <= #1 0;
|
||||
low_ie <= #1 0;
|
||||
low_ip <= #1 0;
|
||||
out_xor <= #1 0;
|
||||
end else begin
|
||||
// writes
|
||||
if (memwrite)
|
||||
@ -104,7 +105,7 @@ module gpio (
|
||||
8'h20: fall_ie <= #1 Din;
|
||||
8'h28: high_ie <= #1 Din;
|
||||
8'h30: low_ie <= #1 Din;
|
||||
8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
|
||||
8'h40: out_xor <= #1 Din;
|
||||
endcase
|
||||
/* verilator lint_on CASEINCOMPLETE */
|
||||
// reads
|
||||
@ -121,7 +122,7 @@ module gpio (
|
||||
8'h2C: Dout <= #1 high_ip;
|
||||
8'h30: Dout <= #1 low_ie;
|
||||
8'h34: Dout <= #1 low_ip;
|
||||
8'h40: Dout <= #1 0; // OUT_XOR reads as 0
|
||||
8'h40: Dout <= #1 out_xor;
|
||||
default: Dout <= #1 0;
|
||||
endcase
|
||||
// interrupts
|
||||
@ -152,7 +153,7 @@ module gpio (
|
||||
flop #(32) sync2(HCLK,input1d,input2d);
|
||||
flop #(32) sync3(HCLK,input2d,input3d);
|
||||
assign input_val = input3d;
|
||||
assign GPIOPinsOut = output_val;
|
||||
assign GPIOPinsOut = output_val ^ out_xor;
|
||||
assign GPIOPinsEn = output_en;
|
||||
|
||||
assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};
|
||||
|
||||
@ -43,77 +43,37 @@ module ram #(parameter BASE=0, RANGE = 65535) (
|
||||
output logic HRESPRam, HREADYRam
|
||||
);
|
||||
|
||||
// Desired changes.
|
||||
// 1. find a way to merge read and write address into 1 port.
|
||||
// 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
|
||||
// 3. implement burst.
|
||||
// 4. remove the configurable latency.
|
||||
localparam ADDR_WIDTH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
logic [`XLEN/8-1:0] ByteMask;
|
||||
logic [31:0] HADDRD, RamAddr;
|
||||
//logic prevHREADYRam, risingHREADYRam;
|
||||
logic initTrans;
|
||||
logic memwrite, memwriteD, memread;
|
||||
logic nextHREADYRam;
|
||||
//logic [3:0] busycount;
|
||||
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
|
||||
|
||||
// a new AHB transactions starts when HTRANS requests a transaction,
|
||||
// the peripheral is selected, and the previous transaction is completing
|
||||
assign initTrans = HREADY & HSELRam & (HTRANS[1]);
|
||||
assign memwrite = initTrans & HWRITE; // *** why is initTrans needed? See CLINT interface
|
||||
assign memwrite = initTrans & HWRITE;
|
||||
assign memread = initTrans & ~HWRITE;
|
||||
|
||||
flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD);
|
||||
flopenr #(32) haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD);
|
||||
|
||||
/* // busy FSM to extend READY signal
|
||||
always @(posedge HCLK, negedge HRESETn)
|
||||
if (~HRESETn) begin
|
||||
busycount <= 0;
|
||||
HREADYRam <= #1 0;
|
||||
end else begin
|
||||
if (initTrans) begin
|
||||
busycount <= 0;
|
||||
HREADYRam <= #1 0;
|
||||
end else if (~HREADYRam) begin
|
||||
if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2
|
||||
HREADYRam <= #1 1;
|
||||
end else begin
|
||||
busycount <= busycount + 1;
|
||||
end
|
||||
end
|
||||
end */
|
||||
|
||||
|
||||
// Stall on a read after a write because the RAM can't take both adddresses on the same cycle
|
||||
assign nextHREADYRam = ~(memwriteD & memread);
|
||||
// assign nextHREADYRam = ~(memwriteD & ~memwrite);
|
||||
flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
|
||||
// assign HREADYRam = ~(memwriteD & ~memwrite);
|
||||
assign HRESPRam = 0; // OK
|
||||
|
||||
localparam ADDR_WIDTH = $clog2(RANGE/8);
|
||||
localparam OFFSET = $clog2(`XLEN/8);
|
||||
|
||||
/* // Rising HREADY edge detector
|
||||
// Indicates when ram is finishing up
|
||||
// Needed because HREADY may go high for other reasons,
|
||||
// and we only want to write data when finishing up.
|
||||
flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
|
||||
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
|
||||
|
||||
/*
|
||||
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
|
||||
memory(.clk(HCLK), .reA(1'b1),
|
||||
.addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
|
||||
.weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
|
||||
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
|
||||
|
||||
|
||||
|
||||
// On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address
|
||||
mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr);
|
||||
|
||||
// Byte mask for subword writes
|
||||
// ***the CLINT and other peripherals duplicate this hardware
|
||||
// *** it shoudl be centralized and sent over HWSTRB
|
||||
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
|
||||
|
||||
// single-ported RAM
|
||||
bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
|
||||
memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));
|
||||
|
||||
@ -93,10 +93,12 @@ module wallypipelinedcore (
|
||||
logic FWriteIntE;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic [`XLEN-1:0] FCvtIntResW;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FRegWriteM;
|
||||
logic FPUStallD;
|
||||
logic [1:0] FResSelW;
|
||||
logic [4:0] SetFflagsM;
|
||||
|
||||
// memory management unit signals
|
||||
@ -134,13 +136,16 @@ module wallypipelinedcore (
|
||||
logic [`PA_BITS-1:0] IFUBusAdr;
|
||||
logic [`XLEN-1:0] IFUBusHRDATA;
|
||||
logic IFUBusRead;
|
||||
logic IFUBusAck;
|
||||
logic IFUBusAck, IFUBusInit;
|
||||
logic [2:0] IFUBurstType;
|
||||
logic [1:0] IFUTransType;
|
||||
logic IFUTransComplete;
|
||||
|
||||
// AHB LSU interface
|
||||
logic [`PA_BITS-1:0] LSUBusAdr;
|
||||
logic LSUBusRead;
|
||||
logic LSUBusWrite;
|
||||
logic LSUBusAck;
|
||||
logic LSUBusAck, LSUBusInit;
|
||||
logic [`XLEN-1:0] LSUBusHRDATA;
|
||||
logic [`XLEN-1:0] LSUBusHWDATA;
|
||||
|
||||
@ -152,6 +157,9 @@ module wallypipelinedcore (
|
||||
logic [4:0] InstrClassM;
|
||||
logic InstrAccessFaultF;
|
||||
logic [2:0] LSUBusSize;
|
||||
logic [2:0] LSUBurstType;
|
||||
logic [1:0] LSUTransType;
|
||||
logic LSUTransComplete;
|
||||
|
||||
logic DCacheMiss;
|
||||
logic DCacheAccess;
|
||||
@ -166,8 +174,8 @@ module wallypipelinedcore (
|
||||
.StallF, .StallD, .StallE, .StallM,
|
||||
.FlushF, .FlushD, .FlushE, .FlushM,
|
||||
// Fetch
|
||||
.IFUBusHRDATA, .IFUBusAck, .PCF, .IFUBusAdr,
|
||||
.IFUBusRead, .IFUStallF,
|
||||
.IFUBusHRDATA, .IFUBusAck, .IFUBusInit, .PCF, .IFUBusAdr,
|
||||
.IFUBusRead, .IFUStallF, .IFUBurstType, .IFUTransType, .IFUTransComplete,
|
||||
.ICacheAccess, .ICacheMiss,
|
||||
|
||||
// Execute
|
||||
@ -224,6 +232,8 @@ module wallypipelinedcore (
|
||||
.CSRReadValW, .ReadDataM, .MDUResultW,
|
||||
.RdW, .ReadDataW,
|
||||
.InstrValidM,
|
||||
.FCvtIntResW,
|
||||
.FResSelW,
|
||||
|
||||
// hazards
|
||||
.StallD, .StallE, .StallM, .StallW,
|
||||
@ -247,8 +257,8 @@ module wallypipelinedcore (
|
||||
.IEUAdrE, .IEUAdrM, .WriteDataE,
|
||||
.ReadDataM, .FlushDCacheM,
|
||||
// connected to ahb (all stay the same)
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck,
|
||||
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize,
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
|
||||
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
|
||||
|
||||
// connect to csr or privilege and stay the same.
|
||||
.PrivilegeModeW, .BigEndianM, // connects to csr
|
||||
@ -279,13 +289,22 @@ module wallypipelinedcore (
|
||||
ahblite ebu(// IFU connections
|
||||
.clk, .reset,
|
||||
.UnsignedLoadM(1'b0), .AtomicMaskedM(2'b00),
|
||||
.IFUBusAdr,
|
||||
.IFUBusRead, .IFUBusHRDATA, .IFUBusAck,
|
||||
.IFUBusAdr, .IFUBusRead,
|
||||
.IFUBusHRDATA,
|
||||
.IFUBurstType,
|
||||
.IFUTransType,
|
||||
.IFUTransComplete,
|
||||
.IFUBusAck,
|
||||
.IFUBusInit,
|
||||
// Signals from Data Cache
|
||||
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusHWDATA,
|
||||
.LSUBusHRDATA,
|
||||
.LSUBusSize,
|
||||
.LSUBurstType,
|
||||
.LSUTransType,
|
||||
.LSUTransComplete,
|
||||
.LSUBusAck,
|
||||
.LSUBusInit,
|
||||
|
||||
.HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
|
||||
.HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST,
|
||||
@ -375,6 +394,8 @@ module wallypipelinedcore (
|
||||
.FWriteIntE, // integer register write enable
|
||||
.FWriteDataE, // Data to be written to memory
|
||||
.FIntResM, // data to be written to integer register
|
||||
.FCvtIntResW, // fp -> int conversion result to be stored in int register
|
||||
.FResSelW, // fpu result selection
|
||||
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
|
||||
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
.SetFflagsM // FPU flags (to privileged unit)
|
||||
|
||||
18
pipelined/srt/stine/Makefile
Normal file
18
pipelined/srt/stine/Makefile
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -lm
|
||||
LIBS =
|
||||
OBJS = disp.o srt4div.o
|
||||
|
||||
srt4div: $(OBJS)
|
||||
$(CC) -g -O3 -o srt4div $(OBJS) $(CFLAGS)
|
||||
|
||||
disp.o: disp.h disp.c
|
||||
$(CC) -g -c -o disp.o disp.c $(CFLAGS)
|
||||
|
||||
srt4div.o: srt4div.c
|
||||
$(CC) -g -c -o srt4div.o srt4div.c $(CFLAGS)
|
||||
|
||||
clean:
|
||||
rm -f *.o *~
|
||||
rm -f core
|
||||
60
pipelined/srt/stine/disp.c
Executable file
60
pipelined/srt/stine/disp.c
Executable file
@ -0,0 +1,60 @@
|
||||
#include "disp.h"
|
||||
|
||||
double rnd_zero(double x, double bits) {
|
||||
if (x < 0)
|
||||
return ceiling(x, bits);
|
||||
else
|
||||
return flr(x, bits);
|
||||
}
|
||||
|
||||
double rne(double x, double precision) {
|
||||
double scale, x_round;
|
||||
scale = pow(2.0, precision);
|
||||
x_round = rint(x * scale) / scale;
|
||||
return x_round;
|
||||
}
|
||||
|
||||
double flr(double x, double precision) {
|
||||
double scale, x_round;
|
||||
scale = pow(2.0, precision);
|
||||
x_round = floor(x * scale) / scale;
|
||||
return x_round;
|
||||
}
|
||||
|
||||
double ceiling(double x, double precision) {
|
||||
double scale, x_round;
|
||||
scale = pow(2.0, precision);
|
||||
x_round = ceil(x * scale) / scale;
|
||||
return x_round;
|
||||
}
|
||||
|
||||
void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) {
|
||||
|
||||
double diff;
|
||||
int i;
|
||||
if (fabs(x) < pow(2.0, -bits_to_right)) {
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
fprintf(out_file,"0");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (x < 0.0) {
|
||||
// fprintf(out_file, "-");
|
||||
// x = - x;
|
||||
x = pow(2.0, ((double) bits_to_left)) + x;
|
||||
}
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
diff = pow(2.0, -i);
|
||||
if (x < diff) {
|
||||
fprintf(out_file, "0");
|
||||
}
|
||||
else {
|
||||
fprintf(out_file, "1");
|
||||
x -= diff;
|
||||
}
|
||||
if (i == 0) {
|
||||
fprintf(out_file, ".");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
18
pipelined/srt/stine/disp.h
Executable file
18
pipelined/srt/stine/disp.h
Executable file
@ -0,0 +1,18 @@
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifndef DISP
|
||||
#define DISP
|
||||
|
||||
double rnd_zero(double x, double bits);
|
||||
|
||||
double rne(double x, double precision);
|
||||
|
||||
double flr(double x, double precision);
|
||||
|
||||
double ceiling(double x, double precision);
|
||||
|
||||
void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file);
|
||||
|
||||
#endif
|
||||
@ -195,7 +195,7 @@ module divide4 #(parameter WIDTH=64)
|
||||
logic [WIDTH:0] Qstar;
|
||||
logic [WIDTH:0] QMstar;
|
||||
logic [WIDTH:0] QM2star;
|
||||
logic [6:0] qtotal;
|
||||
logic [7:0] qtotal;
|
||||
logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2;
|
||||
logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1;
|
||||
logic [WIDTH+3:0] mdivi_temp, mdivi;
|
||||
@ -219,9 +219,9 @@ module divide4 #(parameter WIDTH=64)
|
||||
mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN);
|
||||
mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN);
|
||||
// Simplify QST
|
||||
adder #(7) cpa1 (SumN[WIDTH+3:WIDTH-3], CarryN[WIDTH+3:WIDTH-3], qtotal);
|
||||
adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal);
|
||||
// q = {+2, +1, -1, -2} else q = 0
|
||||
qst4 pd1 (qtotal[6:0], divi1[WIDTH-1:WIDTH-3], quotient);
|
||||
qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient);
|
||||
assign ulp = quotient[2]|quotient[3];
|
||||
assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]);
|
||||
// Map to binary encoding
|
||||
|
||||
BIN
pipelined/srt/stine/srt4div
Executable file
BIN
pipelined/srt/stine/srt4div
Executable file
Binary file not shown.
325
pipelined/srt/stine/srt4div.c
Executable file
325
pipelined/srt/stine/srt4div.c
Executable file
@ -0,0 +1,325 @@
|
||||
#include "disp.h"
|
||||
#include <math.h>
|
||||
|
||||
int qslc (double prem, double d) {
|
||||
|
||||
int q;
|
||||
|
||||
printf("d --> %lg\n", d);
|
||||
printf("rw --> %lg\n", prem);
|
||||
if ((d>=0.0)&&(d<1.0)) {
|
||||
if (prem>=1.0)
|
||||
q = 2;
|
||||
else if (prem>=0.25)
|
||||
q = 1;
|
||||
else if (prem>=-0.25)
|
||||
q = 0;
|
||||
else if (prem >= -1)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=1.0)&&(d<2.0)) {
|
||||
if (prem>=2.0)
|
||||
q = 2;
|
||||
else if (prem>=0.66667)
|
||||
q = 1;
|
||||
else if (prem>=-0.6667)
|
||||
q = 0;
|
||||
else if (prem >= -2)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=2.0)&&(d<3.0)) {
|
||||
if (prem>=4.0)
|
||||
q = 2;
|
||||
else if (prem>=1.25)
|
||||
q = 1;
|
||||
else if (prem>=-1.25)
|
||||
q = 0;
|
||||
else if (prem >= -4)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=3.0)&&(d<4.0)) {
|
||||
if (prem>=5.0)
|
||||
q = 2;
|
||||
else if (prem>=2.0)
|
||||
q = 1;
|
||||
else if (prem>=-2.0)
|
||||
q = 0;
|
||||
else if (prem >= -5)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=4.0)&&(d<5.0)) {
|
||||
if (prem>=6.66667)
|
||||
q = 2;
|
||||
else if (prem>=2.0)
|
||||
q = 1;
|
||||
else if (prem>=-2.0)
|
||||
q = 0;
|
||||
else if (prem >= -6.66667)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=5.0)&&(d<6.0)) {
|
||||
if (prem>=8.0)
|
||||
q = 2;
|
||||
else if (prem>=2.0)
|
||||
q = 1;
|
||||
else if (prem>=-2.0)
|
||||
q = 0;
|
||||
else if (prem >= -8.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=6.0)&&(d<7.0)) {
|
||||
if (prem>=10.0)
|
||||
q = 2;
|
||||
else if (prem>=4.0)
|
||||
q = 1;
|
||||
else if (prem>=-4.0)
|
||||
q = 0;
|
||||
else if (prem >= -10.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=7.0)&&(d<8.0)) {
|
||||
if (prem>=11.0)
|
||||
q = 2;
|
||||
else if (prem>=4.0)
|
||||
q = 1;
|
||||
else if (prem>=-4.0)
|
||||
q = 0;
|
||||
else if (prem >= -11.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=8.0)&&(d<9.0)) {
|
||||
if (prem>=12.0)
|
||||
q = 2;
|
||||
else if (prem>=4.0)
|
||||
q = 1;
|
||||
else if (prem>=-4.0)
|
||||
q = 0;
|
||||
else if (prem >= -12.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=9.0)&&(d<10.0)) {
|
||||
if (prem>=15.0)
|
||||
q = 2;
|
||||
else if (prem>=4.0)
|
||||
q = 1;
|
||||
else if (prem>=-4.0)
|
||||
q = 0;
|
||||
else if (prem >= -15.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=10.0)&&(d<11.0)) {
|
||||
if (prem>=15.0)
|
||||
q = 2;
|
||||
else if (prem>=4.0)
|
||||
q = 1;
|
||||
else if (prem>=-4.0)
|
||||
q = 0;
|
||||
else if (prem >= -15.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=11.0)&&(d<12.0)) {
|
||||
if (prem>=16.0)
|
||||
q = 2;
|
||||
else if (prem>=4.0)
|
||||
q = 1;
|
||||
else if (prem>=-4.0)
|
||||
q = 0;
|
||||
else if (prem >= -16.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=12.0)&&(d<13.0)) {
|
||||
if (prem>=20.0)
|
||||
q = 2;
|
||||
else if (prem>=8.0)
|
||||
q = 1;
|
||||
else if (prem>=-8.0)
|
||||
q = 0;
|
||||
else if (prem >= -20.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=13.0)&&(d<14.0)) {
|
||||
if (prem>=20.0)
|
||||
q = 2;
|
||||
else if (prem>=8.0)
|
||||
q = 1;
|
||||
else if (prem>=-8.0)
|
||||
q = 0;
|
||||
else if (prem >= -20.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=14.0)&&(d<15.0)) {
|
||||
if (prem>=20.0)
|
||||
q = 2;
|
||||
else if (prem>=8.0)
|
||||
q = 1;
|
||||
else if (prem>=-8.0)
|
||||
q = 0;
|
||||
else if (prem >= -20.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
if ((d>=15.0)&&(d<16.0)) {
|
||||
if (prem>=24.0)
|
||||
q = 2;
|
||||
else if (prem>=8.0)
|
||||
q = 1;
|
||||
else if (prem>=-8.0)
|
||||
q = 0;
|
||||
else if (prem >= -24.0)
|
||||
q = -1;
|
||||
else
|
||||
q = -2;
|
||||
return q;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
This routine performs a radix-4 SRT division
|
||||
algorithm. The user inputs the numerator, the denominator,
|
||||
and the number of iterations. It assumes that 0.5 <= D < 1.
|
||||
|
||||
*/
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
double P, N, D, Q, RQ, RD, RREM, scale;
|
||||
int q;
|
||||
int num_iter, i;
|
||||
int prec;
|
||||
if (argc < 5) {
|
||||
fprintf(stderr,
|
||||
"Usage: %s numerator denominator num_iterations prec\n",
|
||||
argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
sscanf(argv[1],"%lg", &N);
|
||||
sscanf(argv[2],"%lg", &D);
|
||||
sscanf(argv[3],"%d", &num_iter);
|
||||
sscanf(argv[4],"%d", &prec);
|
||||
// Round to precision
|
||||
N = rne(N, prec);
|
||||
D = rne(D, prec);
|
||||
printf("N = ");
|
||||
disp_bin(N, 3, prec, stdout);
|
||||
printf("\n");
|
||||
printf("D = ");
|
||||
disp_bin(D, 3, prec, stdout);
|
||||
printf("\n");
|
||||
|
||||
Q = 0;
|
||||
P = N*0.25;
|
||||
printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n",
|
||||
N, D, N/D, num_iter);
|
||||
for (scale = 1, i = 0; i < num_iter; i++) {
|
||||
// Shift by r
|
||||
scale = scale*0.25;
|
||||
q = qslc(flr((4*P)*16,3), D*16);
|
||||
//q = -q;
|
||||
printf("4*W[n] = ");
|
||||
disp_bin(4*P,3,prec,stdout);
|
||||
printf("\n");
|
||||
printf("q*D = ");
|
||||
disp_bin(q*D,3,prec,stdout);
|
||||
printf("\n");
|
||||
printf("W[n+1] = ");
|
||||
disp_bin(P ,3,prec,stdout);
|
||||
printf("\n");
|
||||
// Recurrence
|
||||
P = 4*P - q*D;
|
||||
// OTFC
|
||||
Q = Q + q*scale;
|
||||
printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P);
|
||||
printf("i = %d, q = %d", i, q);
|
||||
printf(", Q = ");
|
||||
disp_bin(Q, 3, prec, stdout);
|
||||
printf(", W = ");
|
||||
disp_bin(P, 3, prec, stdout);
|
||||
printf("\n\n");
|
||||
}
|
||||
if (P < 0) {
|
||||
Q = Q - scale;
|
||||
P = P + D;
|
||||
printf("\nCorrecting Negative Remainder\n");
|
||||
printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
|
||||
printf("Q = ");
|
||||
disp_bin(Q, 3, prec, stdout);
|
||||
printf(", W = ");
|
||||
disp_bin(P, 3, prec, stdout);
|
||||
printf("\n");
|
||||
}
|
||||
RQ = flr(N/D, (double) prec);
|
||||
RD = Q*4;
|
||||
printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
|
||||
printf("true = ");
|
||||
disp_bin(RQ, 3, prec, stdout);
|
||||
printf(", computed = ");
|
||||
disp_bin(RD, 3, prec, stdout);
|
||||
printf("\n\n");
|
||||
printf("REM = %1.18lf \n", P);
|
||||
printf("REM = ");
|
||||
disp_bin(P, 3, prec, stdout);
|
||||
printf("\n\n");
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
@ -1,289 +0,0 @@
|
||||
4000000000000000_4000000000000000_3ff0000000000000
|
||||
c018000000000000_4000000000000000_c008000000000000
|
||||
4024000000000000_4000000000000000_4014000000000000
|
||||
c032000000000000_4000000000000000_c022000000000000
|
||||
4041000000000000_4000000000000000_4031000000000000
|
||||
c05c000000000000_4000000000000000_c04c000000000000
|
||||
406e000000000000_4000000000000000_405e000000000000
|
||||
c07ffff583a53b8e_4000000000000000_c06ffff583a53b8e
|
||||
408199999999999a_4000000000000000_407199999999999a
|
||||
c093333333333333_4000000000000000_c083333333333333
|
||||
40a028f5c28f5c29_4000000000000000_409028f5c28f5c29
|
||||
c0b004189374bc6a_4000000000000000_c0a004189374bc6a
|
||||
40c00068db8bac71_4000000000000000_40b00068db8bac71
|
||||
c0dd1745d1745d17_4000000000000000_c0cd1745d1745d17
|
||||
40e5555555555555_4000000000000000_40d5555555555555
|
||||
c0f999999999999a_4000000000000000_c0e999999999999a
|
||||
410c71c71c71c71c_4000000000000000_40fc71c71c71c71c
|
||||
4000000000000000_c018000000000000_bfe5555555555555
|
||||
c018000000000000_c018000000000000_3ff0000000000000
|
||||
4024000000000000_c018000000000000_c00aaaaaaaaaaaab
|
||||
c032000000000000_c018000000000000_4018000000000000
|
||||
4041000000000000_c018000000000000_c026aaaaaaaaaaab
|
||||
c05c000000000000_c018000000000000_4032aaaaaaaaaaab
|
||||
406e000000000000_c018000000000000_c044000000000000
|
||||
c07ffff583a53b8e_c018000000000000_4055554e57c37d09
|
||||
408199999999999a_c018000000000000_c067777777777778
|
||||
c093333333333333_c018000000000000_4079999999999999
|
||||
40a028f5c28f5c29_c018000000000000_c0858bf258bf258c
|
||||
c0b004189374bc6a_c018000000000000_40955acb6f46508d
|
||||
40c00068db8bac71_c018000000000000_c0a555e124ba3b41
|
||||
c0dd1745d1745d17_c018000000000000_40b364d9364d9365
|
||||
40e5555555555555_c018000000000000_c0cc71c71c71c71c
|
||||
c0f999999999999a_c018000000000000_40d1111111111111
|
||||
410c71c71c71c71c_c018000000000000_c0e2f684bda12f68
|
||||
4000000000000000_4024000000000000_3fd999999999999a
|
||||
c018000000000000_4024000000000000_bfe3333333333333
|
||||
4024000000000000_4024000000000000_3ff0000000000000
|
||||
c032000000000000_4024000000000000_c00ccccccccccccd
|
||||
4041000000000000_4024000000000000_401b333333333333
|
||||
c05c000000000000_4024000000000000_c026666666666666
|
||||
406e000000000000_4024000000000000_4038000000000000
|
||||
c07ffff583a53b8e_4024000000000000_c0499991361dc93e
|
||||
408199999999999a_4024000000000000_405c28f5c28f5c2a
|
||||
c093333333333333_4024000000000000_c06eb851eb851eb8
|
||||
40a028f5c28f5c29_4024000000000000_4079db22d0e56042
|
||||
c0b004189374bc6a_4024000000000000_c089a027525460aa
|
||||
40c00068db8bac71_4024000000000000_40999a415f45e0b5
|
||||
c0dd1745d1745d17_4024000000000000_c0a745d1745d1746
|
||||
40e5555555555555_4024000000000000_40b1111111111111
|
||||
c0f999999999999a_4024000000000000_c0c47ae147ae147b
|
||||
410c71c71c71c71c_4024000000000000_40d6c16c16c16c16
|
||||
4000000000000000_c032000000000000_bfcc71c71c71c71c
|
||||
c018000000000000_c032000000000000_3fd5555555555555
|
||||
4024000000000000_c032000000000000_bfe1c71c71c71c72
|
||||
c032000000000000_c032000000000000_3ff0000000000000
|
||||
4041000000000000_c032000000000000_c00e38e38e38e38e
|
||||
c05c000000000000_c032000000000000_4018e38e38e38e39
|
||||
406e000000000000_c032000000000000_c02aaaaaaaaaaaab
|
||||
c07ffff583a53b8e_c032000000000000_403c71bdca59fc0c
|
||||
408199999999999a_c032000000000000_c04f49f49f49f4a0
|
||||
c093333333333333_c032000000000000_4051111111111111
|
||||
40a028f5c28f5c29_c032000000000000_c06cba9876543210
|
||||
c0b004189374bc6a_c032000000000000_407c790f3f086b67
|
||||
40c00068db8bac71_c032000000000000_c08c7281864da457
|
||||
c0dd1745d1745d17_c032000000000000_4099dbcc48676f31
|
||||
40e5555555555555_c032000000000000_c0a2f684bda12f68
|
||||
c0f999999999999a_c032000000000000_40b6c16c16c16c17
|
||||
410c71c71c71c71c_c032000000000000_c0c948b0fcd6e9e0
|
||||
4000000000000000_4041000000000000_3fbe1e1e1e1e1e1e
|
||||
c018000000000000_4041000000000000_bfc6969696969697
|
||||
4024000000000000_4041000000000000_3fd2d2d2d2d2d2d3
|
||||
c032000000000000_4041000000000000_bfe0f0f0f0f0f0f1
|
||||
4041000000000000_4041000000000000_3ff0000000000000
|
||||
c05c000000000000_4041000000000000_c00a5a5a5a5a5a5a
|
||||
406e000000000000_4041000000000000_401c3c3c3c3c3c3c
|
||||
c07ffff583a53b8e_4041000000000000_c02e1e143faa9268
|
||||
408199999999999a_4041000000000000_4030909090909091
|
||||
c093333333333333_4041000000000000_c042121212121212
|
||||
40a028f5c28f5c29_4041000000000000_405e6b3804d19e6b
|
||||
c0b004189374bc6a_4041000000000000_c06e25d3e863448b
|
||||
40c00068db8bac71_4041000000000000_407e1ee37f25085c
|
||||
c0dd1745d1745d17_4041000000000000_c08b6132a7041b61
|
||||
40e5555555555555_4041000000000000_4094141414141414
|
||||
c0f999999999999a_4041000000000000_c0a8181818181818
|
||||
410c71c71c71c71c_4041000000000000_40bac5701ac5701a
|
||||
4000000000000000_c05c000000000000_bfa2492492492492
|
||||
c018000000000000_c05c000000000000_3fbb6db6db6db6db
|
||||
4024000000000000_c05c000000000000_bfc6db6db6db6db7
|
||||
c032000000000000_c05c000000000000_3fd4924924924925
|
||||
4041000000000000_c05c000000000000_bfe36db6db6db6db
|
||||
c05c000000000000_c05c000000000000_3ff0000000000000
|
||||
406e000000000000_c05c000000000000_c001249249249249
|
||||
c07ffff583a53b8e_c05c000000000000_4012491e945e6b2d
|
||||
408199999999999a_c05c000000000000_c0241d41d41d41d5
|
||||
c093333333333333_c05c000000000000_4035f15f15f15f16
|
||||
40a028f5c28f5c29_c05c000000000000_c04277f44c118de6
|
||||
c0b004189374bc6a_c05c000000000000_40524dd2f1a9fbe7
|
||||
40c00068db8bac71_c05c000000000000_c062499c689fa081
|
||||
c0dd1745d1745d17_c05c000000000000_40709f959c427e56
|
||||
40e5555555555555_c05c000000000000_c088618618618618
|
||||
c0f999999999999a_c05c000000000000_409d41d41d41d41e
|
||||
410c71c71c71c71c_c05c000000000000_c0a0410410410410
|
||||
4000000000000000_406e000000000000_3f91111111111111
|
||||
c018000000000000_406e000000000000_bfa999999999999a
|
||||
4024000000000000_406e000000000000_3fb5555555555555
|
||||
c032000000000000_406e000000000000_bfc3333333333333
|
||||
4041000000000000_406e000000000000_3fd2222222222222
|
||||
c05c000000000000_406e000000000000_bfedddddddddddde
|
||||
406e000000000000_406e000000000000_3ff0000000000000
|
||||
c07ffff583a53b8e_406e000000000000_c001110b796930d4
|
||||
408199999999999a_406e000000000000_4012c5f92c5f92c6
|
||||
c093333333333333_406e000000000000_c0247ae147ae147b
|
||||
40a028f5c28f5c29_406e000000000000_40313cc1e098ead6
|
||||
c0b004189374bc6a_406e000000000000_c041156f8c384071
|
||||
40c00068db8bac71_406e000000000000_40511180ea2e95ce
|
||||
c0dd1745d1745d17_406e000000000000_c06f07c1f07c1f07
|
||||
40e5555555555555_406e000000000000_4076c16c16c16c16
|
||||
c0f999999999999a_406e000000000000_c08b4e81b4e81b4f
|
||||
410c71c71c71c71c_406e000000000000_409e573ac901e573
|
||||
4000000000000000_c07ffff583a53b8e_bf8000053e2f1a08
|
||||
c018000000000000_c07ffff583a53b8e_3f980007dd46a70b
|
||||
4024000000000000_c07ffff583a53b8e_bfa400068dbae089
|
||||
c032000000000000_c07ffff583a53b8e_3fb20005e5f4fd48
|
||||
4041000000000000_c07ffff583a53b8e_bfc1000592120ba8
|
||||
c05c000000000000_c07ffff583a53b8e_3fdc00092cd26d8d
|
||||
406e000000000000_c07ffff583a53b8e_bfee0009d49850ce
|
||||
c07ffff583a53b8e_c07ffff583a53b8e_3ff0000000000000
|
||||
408199999999999a_c07ffff583a53b8e_c001999f5e009ca2
|
||||
c093333333333333_c07ffff583a53b8e_401333397dd21f3c
|
||||
40a028f5c28f5c29_c07ffff583a53b8e_c02028fb0e2a73e4
|
||||
c0b004189374bc6a_c07ffff583a53b8e_4030041dd2fb6fd0
|
||||
40c00068db8bac71_c07ffff583a53b8e_c040006e19dd229c
|
||||
c0dd1745d1745d17_c07ffff583a53b8e_405d174f59ca00c8
|
||||
40e5555555555555_c07ffff583a53b8e_c065555c52e9780a
|
||||
c0f999999999999a_c07ffff583a53b8e_407999a1fd1829a6
|
||||
410c71c71c71c71c_c07ffff583a53b8e_c08c71d06e8ca00d
|
||||
4000000000000000_408199999999999a_3f7d1745d1745d17
|
||||
c018000000000000_408199999999999a_bf85d1745d1745d1
|
||||
4024000000000000_408199999999999a_3f922e8ba2e8ba2e
|
||||
c032000000000000_408199999999999a_bfa05d1745d1745d
|
||||
4041000000000000_408199999999999a_3fbee8ba2e8ba2e8
|
||||
c05c000000000000_408199999999999a_bfc9745d1745d174
|
||||
406e000000000000_408199999999999a_3fdb45d1745d1745
|
||||
c07ffff583a53b8e_408199999999999a_bfed173c4921d90c
|
||||
408199999999999a_408199999999999a_3ff0000000000000
|
||||
c093333333333333_408199999999999a_c001745d1745d174
|
||||
40a028f5c28f5c29_408199999999999a_401d61bed61bed61
|
||||
c0b004189374bc6a_408199999999999a_c02d1eb851eb851d
|
||||
40c00068db8bac71_408199999999999a_403d180477e6ade4
|
||||
c0dd1745d1745d17_408199999999999a_c04a723f789854a0
|
||||
40e5555555555555_408199999999999a_405364d9364d9364
|
||||
c0f999999999999a_408199999999999a_c06745d1745d1746
|
||||
410c71c71c71c71c_408199999999999a_4079dbcc48676f30
|
||||
4000000000000000_c093333333333333_bf6aaaaaaaaaaaab
|
||||
c018000000000000_c093333333333333_3f74000000000000
|
||||
4024000000000000_c093333333333333_bf80aaaaaaaaaaab
|
||||
c032000000000000_c093333333333333_3f9e000000000000
|
||||
4041000000000000_c093333333333333_bfac555555555556
|
||||
c05c000000000000_c093333333333333_3fb7555555555556
|
||||
406e000000000000_c093333333333333_bfc9000000000000
|
||||
c07ffff583a53b8e_c093333333333333_3fdaaaa1edb45c4c
|
||||
408199999999999a_c093333333333333_bfed555555555556
|
||||
c093333333333333_c093333333333333_3ff0000000000000
|
||||
40a028f5c28f5c29_c093333333333333_c00aeeeeeeeeeeef
|
||||
c0b004189374bc6a_c093333333333333_401ab17e4b17e4b1
|
||||
40c00068db8bac71_c093333333333333_c02aab596de8ca12
|
||||
c0dd1745d1745d17_c093333333333333_40383e0f83e0f83e
|
||||
40e5555555555555_c093333333333333_c041c71c71c71c72
|
||||
c0f999999999999a_c093333333333333_4055555555555556
|
||||
410c71c71c71c71c_c093333333333333_c067b425ed097b42
|
||||
4000000000000000_40a028f5c28f5c29_3f5faee41e6a7498
|
||||
c018000000000000_40a028f5c28f5c29_bf67c32b16cfd772
|
||||
4024000000000000_40a028f5c28f5c29_3f73cd4e930288df
|
||||
c032000000000000_40a028f5c28f5c29_bf81d260511be196
|
||||
4041000000000000_40a028f5c28f5c29_3f90d4e930288df1
|
||||
c05c000000000000_40a028f5c28f5c29_bfabb9079a9d2605
|
||||
406e000000000000_40a028f5c28f5c29_3fbdb3f5dc83cd4f
|
||||
c07ffff583a53b8e_40a028f5c28f5c29_bfcfaed9bca398bf
|
||||
408199999999999a_40a028f5c28f5c29_3fd16cfd7720f354
|
||||
c093333333333333_40a028f5c28f5c29_bfe30288df0cac5b
|
||||
40a028f5c28f5c29_40a028f5c28f5c29_3ff0000000000000
|
||||
c0b004189374bc6a_40a028f5c28f5c29_c00fb70081c635bb
|
||||
40c00068db8bac71_40a028f5c28f5c29_401fafb3c1f3a182
|
||||
c0dd1745d1745d17_40a028f5c28f5c29_c02ccd899003afd0
|
||||
40e5555555555555_40a028f5c28f5c29_40351f42bef1a310
|
||||
c0f999999999999a_40a028f5c28f5c29_c04958b67ebb907a
|
||||
410c71c71c71c71c_40a028f5c28f5c29_405c29ae53ecd96a
|
||||
4000000000000000_c0b004189374bc6a_bf4ff7d0f16c2e0a
|
||||
c018000000000000_c0b004189374bc6a_3f57f9dcb5112287
|
||||
4024000000000000_c0b004189374bc6a_bf63fae296e39cc6
|
||||
c032000000000000_c0b004189374bc6a_3f71fb6587ccd9e5
|
||||
4041000000000000_c0b004189374bc6a_bf80fba700417875
|
||||
c05c000000000000_c0b004189374bc6a_3f9bf8d6d33ea848
|
||||
406e000000000000_c0b004189374bc6a_bfadf853e2556b29
|
||||
c07ffff583a53b8e_c0b004189374bc6a_3fbff7c677bfebb5
|
||||
408199999999999a_c0b004189374bc6a_bfc1951951951953
|
||||
c093333333333333_c0b004189374bc6a_3fd32e4a2a741b9f
|
||||
40a028f5c28f5c29_c0b004189374bc6a_bfe024d3c19930d9
|
||||
c0b004189374bc6a_c0b004189374bc6a_3ff0000000000000
|
||||
40c00068db8bac71_c0b004189374bc6a_c00ff8a272e15ca2
|
||||
c0dd1745d1745d17_c0b004189374bc6a_401d0fd53890e409
|
||||
40e5555555555555_c0b004189374bc6a_c0254fe0a0f2c95b
|
||||
c0f999999999999a_c0b004189374bc6a_4039930d8df024d5
|
||||
410c71c71c71c71c_c0b004189374bc6a_c04c6a80d6990c7a
|
||||
4000000000000000_40c00068db8bac71_3f3fff2e4e46e7a8
|
||||
c018000000000000_40c00068db8bac71_bf47ff62bab52dbe
|
||||
4024000000000000_40c00068db8bac71_3f53ff7cf0ec50c9
|
||||
c032000000000000_40c00068db8bac71_bf61ff8a0c07e24f
|
||||
4041000000000000_40c00068db8bac71_3f70ff909995ab11
|
||||
c05c000000000000_40c00068db8bac71_bf8bff48847e0ab3
|
||||
406e000000000000_40c00068db8bac71_3f9dff3b6962792e
|
||||
c07ffff583a53b8e_40c00068db8bac71_bfafff23d230d9a4
|
||||
408199999999999a_40c00068db8bac71_3fb1992644a6ff6a
|
||||
c093333333333333_40c00068db8bac71_bfc332b5622a8afe
|
||||
40a028f5c28f5c29_40c00068db8bac71_3fd0288bdd4a34fd
|
||||
c0b004189374bc6a_40c00068db8bac71_bfe003af9fc0ed8b
|
||||
40c00068db8bac71_40c00068db8bac71_3ff0000000000000
|
||||
c0dd1745d1745d17_40c00068db8bac71_c00d16872fe35e3c
|
||||
40e5555555555555_40c00068db8bac71_401554c989849a70
|
||||
c0f999999999999a_40c00068db8bac71_c02998f1d838b954
|
||||
410c71c71c71c71c_40c00068db8bac71_403c710cb75b7895
|
||||
4000000000000000_c0dd1745d1745d17_bf2199999999999a
|
||||
c018000000000000_c0dd1745d1745d17_3f3a666666666667
|
||||
4024000000000000_c0dd1745d1745d17_bf46000000000000
|
||||
c032000000000000_c0dd1745d1745d17_3f53cccccccccccd
|
||||
4041000000000000_c0dd1745d1745d17_bf62b33333333333
|
||||
c05c000000000000_c0dd1745d1745d17_3f7ecccccccccccd
|
||||
406e000000000000_c0dd1745d1745d17_bf80800000000000
|
||||
c07ffff583a53b8e_c0dd1745d1745d17_3f919993d5347a5b
|
||||
408199999999999a_c0dd1745d1745d17_bfa35c28f5c28f5d
|
||||
c093333333333333_c0dd1745d1745d17_3fb51eb851eb851f
|
||||
40a028f5c28f5c29_c0dd1745d1745d17_bfc1c6a7ef9db22d
|
||||
c0b004189374bc6a_c0dd1745d1745d17_3fd19e1b089a0275
|
||||
40c00068db8bac71_c0dd1745d1745d17_bfe19a0cf1800a7c
|
||||
c0dd1745d1745d17_c0dd1745d1745d17_3ff0000000000000
|
||||
40e5555555555555_c0dd1745d1745d17_c007777777777777
|
||||
c0f999999999999a_c0dd1745d1745d17_401c28f5c28f5c2a
|
||||
410c71c71c71c71c_c0dd1745d1745d17_c02f49f49f49f49f
|
||||
4000000000000000_40e5555555555555_3f18000000000000
|
||||
c018000000000000_40e5555555555555_bf22000000000000
|
||||
4024000000000000_40e5555555555555_3f3e000000000000
|
||||
c032000000000000_40e5555555555555_bf4b000000000000
|
||||
4041000000000000_40e5555555555555_3f59800000000000
|
||||
c05c000000000000_40e5555555555555_bf65000000000000
|
||||
406e000000000000_40e5555555555555_3f76800000000000
|
||||
c07ffff583a53b8e_40e5555555555555_bf87fff822bbecab
|
||||
408199999999999a_40e5555555555555_3f9a666666666667
|
||||
c093333333333333_40e5555555555555_bfaccccccccccccd
|
||||
40a028f5c28f5c29_40e5555555555555_3fb83d70a3d70a3e
|
||||
c0b004189374bc6a_40e5555555555555_bfc80624dd2f1a9f
|
||||
40c00068db8bac71_40e5555555555555_3fd8009d495182aa
|
||||
c0dd1745d1745d17_40e5555555555555_bfe5d1745d1745d2
|
||||
40e5555555555555_40e5555555555555_3ff0000000000000
|
||||
c0f999999999999a_40e5555555555555_c003333333333334
|
||||
410c71c71c71c71c_40e5555555555555_4015555555555555
|
||||
4000000000000000_c0f999999999999a_bf04000000000000
|
||||
c018000000000000_c0f999999999999a_3f1e000000000000
|
||||
4024000000000000_c0f999999999999a_bf29000000000000
|
||||
c032000000000000_c0f999999999999a_3f36800000000000
|
||||
4041000000000000_c0f999999999999a_bf45400000000000
|
||||
c05c000000000000_c0f999999999999a_3f51800000000000
|
||||
406e000000000000_c0f999999999999a_bf62c00000000000
|
||||
c07ffff583a53b8e_c0f999999999999a_3f73fff972474538
|
||||
408199999999999a_c0f999999999999a_bf86000000000000
|
||||
c093333333333333_c0f999999999999a_3f97ffffffffffff
|
||||
40a028f5c28f5c29_c0f999999999999a_bfa4333333333333
|
||||
c0b004189374bc6a_c0f999999999999a_3fb4051eb851eb84
|
||||
40c00068db8bac71_c0f999999999999a_bfc40083126e978d
|
||||
c0dd1745d1745d17_c0f999999999999a_3fd22e8ba2e8ba2e
|
||||
40e5555555555555_c0f999999999999a_bfeaaaaaaaaaaaaa
|
||||
c0f999999999999a_c0f999999999999a_3ff0000000000000
|
||||
410c71c71c71c71c_c0f999999999999a_c001c71c71c71c71
|
||||
4000000000000000_410c71c71c71c71c_3ef2000000000000
|
||||
c018000000000000_410c71c71c71c71c_bf0b000000000000
|
||||
4024000000000000_410c71c71c71c71c_3f16800000000000
|
||||
c032000000000000_410c71c71c71c71c_bf24400000000000
|
||||
4041000000000000_410c71c71c71c71c_3f33200000000000
|
||||
c05c000000000000_410c71c71c71c71c_bf4f800000000000
|
||||
406e000000000000_410c71c71c71c71c_3f50e00000000000
|
||||
c07ffff583a53b8e_410c71c71c71c71c_bf61fffa1a0cf180
|
||||
408199999999999a_410c71c71c71c71c_3f73ccccccccccce
|
||||
c093333333333333_410c71c71c71c71c_bf8599999999999a
|
||||
40a028f5c28f5c29_410c71c71c71c71c_3f922e147ae147ae
|
||||
c0b004189374bc6a_410c71c71c71c71c_bfa2049ba5e353f8
|
||||
40c00068db8bac71_410c71c71c71c71c_3fb20075f6fd21ff
|
||||
c0dd1745d1745d17_410c71c71c71c71c_bfc05d1745d1745d
|
||||
40e5555555555555_410c71c71c71c71c_3fd8000000000000
|
||||
c0f999999999999a_410c71c71c71c71c_bfecccccccccccce
|
||||
410c71c71c71c71c_410c71c71c71c71c_3ff0000000000000
|
||||
@ -1,145 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// testbench-imperas.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 9 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Wally Testbench and helper modules
|
||||
// Applies test programs from the Imperas suite
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
`include "wally-config.vh"
|
||||
module testbench();
|
||||
logic clk;
|
||||
logic reset, reset_ext;
|
||||
int test, i, errors, totalerrors;
|
||||
logic [31:0] sig32[10000:0];
|
||||
logic [`XLEN-1:0] signature[10000:0];
|
||||
logic [`XLEN-1:0] testadr;
|
||||
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
|
||||
logic [`XLEN-1:0] meminit;
|
||||
string tests[];
|
||||
logic [`AHBW-1:0] HRDATAEXT;
|
||||
logic HREADYEXT, HRESPEXT;
|
||||
logic [31:0] HADDR;
|
||||
logic [`AHBW-1:0] HWDATA;
|
||||
logic HWRITE;
|
||||
logic [2:0] HSIZE;
|
||||
logic [2:0] HBURST;
|
||||
logic [3:0] HPROT;
|
||||
logic [1:0] HTRANS;
|
||||
logic HMASTLOCK;
|
||||
logic HCLK, HRESETn;
|
||||
|
||||
// pick tests based on modes supported
|
||||
initial
|
||||
// tests = {"../../tests/imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremarkcodemod.bare.riscv.memfile", "1000"};
|
||||
tests = {"../../benchmarks/riscv-coremark/work/coremark.bare.riscv.memfile", "1000"};
|
||||
string signame, memfilename;
|
||||
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
|
||||
logic UARTSin, UARTSout;
|
||||
logic SDCCLK;
|
||||
logic SDCCmdIn;
|
||||
logic SDCCmdOut;
|
||||
logic SDCCmdOE;
|
||||
logic [3:0] SDCDatIn;
|
||||
|
||||
logic HREADY;
|
||||
logic HSELEXT;
|
||||
|
||||
assign SDCmd = 1'bz;
|
||||
assign SDCDat = 4'bz;
|
||||
|
||||
|
||||
// instantiate device to be tested
|
||||
assign GPIOPinsIn = 0;
|
||||
assign UARTSin = 1;
|
||||
assign HREADYEXT = 1;
|
||||
assign HRESPEXT = 0;
|
||||
assign HRDATAEXT = 0;
|
||||
wallypipelinedsoc dut(.clk, .reset_ext, .reset(), .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
|
||||
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
|
||||
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
|
||||
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
|
||||
|
||||
logic [31:0] InstrW;
|
||||
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
|
||||
|
||||
// Track names of instructions
|
||||
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
|
||||
dut.core.ifu.FinalInstrRawF,
|
||||
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
|
||||
dut.core.ifu.InstrM, InstrW,
|
||||
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
|
||||
/*
|
||||
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
|
||||
dut.core.ifu.icache.controller.FinalInstrRawF,
|
||||
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
|
||||
dut.core.ifu.InstrM, InstrW,
|
||||
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
|
||||
*/
|
||||
logic [`XLEN-1:0] PCW;
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.core.ifu.PCM, PCW);
|
||||
|
||||
// initialize tests
|
||||
integer j;
|
||||
initial
|
||||
begin
|
||||
totalerrors = 0;
|
||||
// read test vectors into memory
|
||||
memfilename = tests[0];
|
||||
$readmemh(memfilename, dut.uncore.ram.ram.RAM);
|
||||
//for(j=268437955; j < 268566528; j = j+1)
|
||||
//dut.uncore.ram.RAM[j] = 64'b0;
|
||||
// ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.addr";
|
||||
// ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.lab";
|
||||
//dut.uncore.ram.RAM[268437713]=64'b1;
|
||||
reset_ext = 1; # 22; reset_ext = 0;
|
||||
end
|
||||
// generate clock to sequence tests
|
||||
always
|
||||
begin
|
||||
clk = 1; # 5; clk = 0; # 5;
|
||||
end
|
||||
always @(negedge clk)
|
||||
begin
|
||||
if (dut.core.priv.priv.ecallM) begin
|
||||
#20;
|
||||
$display("Code ended with ebreakM");
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
|
||||
initial begin
|
||||
// $readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory);
|
||||
// $readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.memory);
|
||||
$readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
|
||||
$readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
endmodule
|
||||
/* verilator lint_on STMTDLY */
|
||||
/* verilator lint_on WIDTH */
|
||||
@ -10,120 +10,64 @@ module testbenchfp;
|
||||
parameter TEST="none";
|
||||
|
||||
string Tests[]; // list of tests to be run
|
||||
string FmaRneTests[]; // list of FMA round to nearest even tests to run
|
||||
string FmaRuTests[]; // list of FMA round up tests to run
|
||||
string FmaRdTests[]; // list of FMA round down tests to run
|
||||
string FmaRzTests[]; // list of FMA round twords zero
|
||||
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
|
||||
logic [2:0] OpCtrl[]; // list of op controls
|
||||
logic [2:0] Unit[]; // list of units being tested
|
||||
logic WriteInt[]; // Is being written to integer resgiter
|
||||
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
|
||||
logic [1:0] Fmt[]; // list of formats for the other units
|
||||
logic [1:0] FmaFmt[]; // list of formats for the FMA
|
||||
|
||||
|
||||
logic clk=0;
|
||||
logic [31:0] TestNum=0; // index for the test
|
||||
logic [31:0] FmaTestNum=0; // index for the test
|
||||
logic [31:0] OpCtrlNum=0; // index for OpCtrl
|
||||
logic [31:0] errors=0; // how many errors
|
||||
logic [31:0] VectorNum=0; // index for test vector
|
||||
logic [31:0] FmaVectorNum=0; // index for test vector
|
||||
logic [31:0] FrmNum=0; // index for rounding mode
|
||||
logic [`FLEN*4+7:0] TestVectors[46464:0]; // list of test vectors
|
||||
logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
|
||||
logic [`FLEN*4+7:0] FmaRuVectors[6133248:0]; // list of fma ru test vectors
|
||||
logic [`FLEN*4+7:0] FmaRdVectors[6133248:0]; // list of fma rd test vectors
|
||||
logic [`FLEN*4+7:0] FmaRzVectors[6133248:0]; // list of fma rz test vectors
|
||||
logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
|
||||
logic [`FLEN*4+7:0] TestVectors[6133248:0]; // list of test vectors
|
||||
|
||||
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
|
||||
logic [1:0] FmtVal; // value of the current Fmt
|
||||
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
|
||||
logic WriteIntVal; // value of the current WriteInt
|
||||
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRuX, FmaRuY, FmaRuZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRdX, FmaRdY, FmaRdZ; // inputs read from TestFloat
|
||||
logic [`FLEN-1:0] FmaRnmX, FmaRnmY, FmaRnmZ; // inputs read from TestFloat
|
||||
logic [`XLEN-1:0] SrcA; // integer input
|
||||
logic [`FLEN-1:0] Ans; // correct answer from TestFloat
|
||||
logic [`FLEN-1:0] FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
|
||||
logic [`FLEN-1:0] Res; // result from other units
|
||||
logic [`FLEN-1:0] FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
|
||||
logic [4:0] AnsFlg; // correct flags read from testfloat
|
||||
logic [4:0] FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
|
||||
logic [4:0] ResFlg; // Result flags
|
||||
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
|
||||
logic [`FMTBITS-1:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
|
||||
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
|
||||
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
|
||||
logic [4:0] ResFlg, Flg; // Result flags
|
||||
logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
|
||||
logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit
|
||||
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
|
||||
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
|
||||
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
|
||||
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
|
||||
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
|
||||
logic AnsNaN, ResNaN, NaNGood;
|
||||
logic XSgn, YSgn, ZSgn; // sign of the inputs
|
||||
logic FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
|
||||
logic FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
|
||||
logic FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
|
||||
logic FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
|
||||
logic FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
|
||||
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs
|
||||
logic [`NE-1:0] FmaRneXExp, FmaRneYExp, FmaRneZExp;
|
||||
logic [`NE-1:0] FmaRzXExp, FmaRzYExp, FmaRzZExp;
|
||||
logic [`NE-1:0] FmaRuXExp, FmaRuYExp, FmaRuZExp;
|
||||
logic [`NE-1:0] FmaRdXExp, FmaRdYExp, FmaRdZExp;
|
||||
logic [`NE-1:0] FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
|
||||
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs
|
||||
logic [`NF:0] FmaRneXMan, FmaRneYMan, FmaRneZMan;
|
||||
logic [`NF:0] FmaRzXMan, FmaRzYMan, FmaRzZMan;
|
||||
logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan;
|
||||
logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan;
|
||||
logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
|
||||
logic XNaN, YNaN, ZNaN; // is the input NaN
|
||||
logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
|
||||
logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
|
||||
logic FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
|
||||
logic FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
|
||||
logic FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
|
||||
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
|
||||
logic FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
|
||||
logic FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
|
||||
logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
|
||||
logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
|
||||
logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
|
||||
logic XDenorm, ZDenorm; // is the input denormalized
|
||||
logic FmaRneXDenorm, FmaRneZDenorm;
|
||||
logic FmaRzXDenorm, FmaRzZDenorm;
|
||||
logic FmaRuXDenorm, FmaRuZDenorm;
|
||||
logic FmaRdXDenorm, FmaRdZDenorm;
|
||||
logic FmaRnmXDenorm, FmaRnmZDenorm;
|
||||
logic XInf, YInf, ZInf; // is the input infinity
|
||||
logic FmaRneXInf, FmaRneYInf, FmaRneZInf;
|
||||
logic FmaRzXInf, FmaRzYInf, FmaRzZInf;
|
||||
logic FmaRuXInf, FmaRuYInf, FmaRuZInf;
|
||||
logic FmaRdXInf, FmaRdYInf, FmaRdZInf;
|
||||
logic FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
|
||||
logic XZero, YZero, ZZero; // is the input zero
|
||||
logic FmaRneXZero, FmaRneYZero, FmaRneZZero;
|
||||
logic FmaRzXZero, FmaRzYZero, FmaRzZZero;
|
||||
logic FmaRuXZero, FmaRuYZero, FmaRuZZero;
|
||||
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
|
||||
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
|
||||
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
|
||||
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
|
||||
logic IntZeroE;
|
||||
logic CvtResSgnE;
|
||||
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
|
||||
logic [`NE:0] CvtCalcExpE; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
|
||||
logic CvtResDenormUfE;
|
||||
|
||||
|
||||
// in-between FMA signals
|
||||
logic Mult;
|
||||
logic [`NE+1:0] ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
|
||||
logic AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
|
||||
logic KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd;
|
||||
logic [$clog2(3*`NF+7)-1:0] NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
|
||||
logic [3*`NF+5:0] SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;
|
||||
logic InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
|
||||
logic NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
|
||||
logic ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
|
||||
logic PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
|
||||
logic [`NE+1:0] ProdExpE;
|
||||
logic AddendStickyE;
|
||||
logic KillProdE;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE;
|
||||
logic [3*`NF+5:0] SumE;
|
||||
logic InvZE;
|
||||
logic NegSumE;
|
||||
logic ZSgnEffE;
|
||||
logic PSgnE;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -282,15 +226,13 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f128_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f128_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
|
||||
// add the format for the Fma
|
||||
FmaFmt = {FmaFmt, 2'b11};
|
||||
Tests = {Tests, f128fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (`D_SUPPORTED) begin // if double precision is supported
|
||||
@ -411,14 +353,13 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f64_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b01};
|
||||
Tests = {Tests, f64fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (`F_SUPPORTED) begin // if single precision being supported
|
||||
@ -523,14 +464,13 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b00};
|
||||
Tests = {Tests, f32fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
end
|
||||
end
|
||||
if (`ZFH_SUPPORTED) begin // if half precision supported
|
||||
@ -617,19 +557,18 @@ module testbenchfp;
|
||||
// end
|
||||
// end
|
||||
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
|
||||
// add each rounding mode to it's own list of tests
|
||||
// - fma tests are very long, so run all rounding modes in parallel
|
||||
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
|
||||
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
|
||||
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
|
||||
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
|
||||
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
|
||||
FmaFmt = {FmaFmt, 2'b10};
|
||||
Tests = {Tests, f16fma};
|
||||
OpCtrl = {OpCtrl, `FMA_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `FMAUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// check if nothing is being tested
|
||||
if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
|
||||
if (Tests.size() == 0) begin
|
||||
$display("TEST %s not supported in this configuration", TEST);
|
||||
$stop;
|
||||
end
|
||||
@ -648,26 +587,17 @@ module testbenchfp;
|
||||
// Read the first test
|
||||
initial begin
|
||||
$display("\n\nRunning %s vectors", Tests[TestNum]);
|
||||
$display("Running FMA precision %d", FmaTestNum);
|
||||
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
|
||||
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
|
||||
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
|
||||
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
|
||||
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
|
||||
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
|
||||
// set the test index to 0
|
||||
TestNum = 0;
|
||||
FmaTestNum = 0;
|
||||
end
|
||||
|
||||
// set a the signals for all tests
|
||||
always_comb FmaFmtVal = FmaFmt[FmaTestNum];
|
||||
always_comb UnitVal = Unit[TestNum];
|
||||
always_comb FmtVal = Fmt[TestNum];
|
||||
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
|
||||
always_comb WriteIntVal = WriteInt[OpCtrlNum];
|
||||
always_comb FrmVal = Frm[FrmNum];
|
||||
assign Mult = OpCtrlVal === 3'b100;
|
||||
|
||||
// modify the format signal if only 2 percisions supported
|
||||
// - 1 for the larger precision
|
||||
@ -675,61 +605,9 @@ module testbenchfp;
|
||||
always_comb begin
|
||||
if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
|
||||
else ModFmt = FmtVal;
|
||||
if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
|
||||
else FmaModFmt = FmaFmtVal;
|
||||
end
|
||||
|
||||
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
|
||||
readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg),
|
||||
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
|
||||
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
|
||||
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
|
||||
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
|
||||
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
|
||||
.XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm),
|
||||
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
|
||||
.XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
|
||||
readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg),
|
||||
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
|
||||
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
|
||||
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
|
||||
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
|
||||
.XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm),
|
||||
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
|
||||
.XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
|
||||
readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg),
|
||||
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
|
||||
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
|
||||
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
|
||||
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
|
||||
.XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm),
|
||||
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
|
||||
.XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
|
||||
readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg),
|
||||
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
|
||||
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
|
||||
.XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm),
|
||||
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
|
||||
.XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
|
||||
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
|
||||
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
|
||||
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
|
||||
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
|
||||
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
|
||||
.XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm),
|
||||
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
|
||||
.XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
|
||||
.X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
|
||||
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
|
||||
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
|
||||
@ -754,124 +632,30 @@ module testbenchfp;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// instantiate devices under test
|
||||
// - one fma for each precison
|
||||
// - all the units for the other tests (including fma for add/sub/mul)
|
||||
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
|
||||
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
|
||||
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
|
||||
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
|
||||
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
|
||||
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
|
||||
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
|
||||
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
|
||||
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
|
||||
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
|
||||
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
|
||||
.XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf),
|
||||
.XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN),
|
||||
.KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp),
|
||||
.SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff),
|
||||
.PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE),
|
||||
.FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
|
||||
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
|
||||
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
|
||||
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
|
||||
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
|
||||
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
|
||||
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
|
||||
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
|
||||
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
|
||||
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
|
||||
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
|
||||
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
|
||||
.XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf),
|
||||
.XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN),
|
||||
.KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp),
|
||||
.SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff),
|
||||
.PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ),
|
||||
.FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
|
||||
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
|
||||
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
|
||||
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
|
||||
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
|
||||
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
|
||||
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
|
||||
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
|
||||
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
|
||||
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
|
||||
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
|
||||
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
|
||||
.XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf),
|
||||
.XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN),
|
||||
.KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp),
|
||||
.SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff),
|
||||
.PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU),
|
||||
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
|
||||
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
|
||||
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
|
||||
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
|
||||
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
|
||||
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
|
||||
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
|
||||
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
|
||||
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
|
||||
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
|
||||
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
|
||||
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
|
||||
.XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf),
|
||||
.XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN),
|
||||
.KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp),
|
||||
.SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff),
|
||||
.PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD),
|
||||
.FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
|
||||
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
|
||||
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
|
||||
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
|
||||
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
|
||||
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
|
||||
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
|
||||
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
|
||||
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
|
||||
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
|
||||
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
|
||||
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
|
||||
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
|
||||
.XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf),
|
||||
.XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN),
|
||||
.KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp),
|
||||
.SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff),
|
||||
.PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM),
|
||||
.FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));
|
||||
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
|
||||
fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
|
||||
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
|
||||
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
|
||||
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
|
||||
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf),
|
||||
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN),
|
||||
|
||||
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
|
||||
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
|
||||
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
|
||||
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
|
||||
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
|
||||
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
|
||||
|
||||
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
|
||||
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
|
||||
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
|
||||
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
|
||||
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
|
||||
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
|
||||
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
|
||||
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
|
||||
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
|
||||
// .CvtRes, .CvtFlgE);
|
||||
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//Check if the correct answer and result is a NaN
|
||||
always_comb begin
|
||||
case (FmaFmtVal)
|
||||
4'b11: begin // quad
|
||||
FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
|
||||
end
|
||||
4'b01: begin // double
|
||||
FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
|
||||
end
|
||||
4'b00: begin // single
|
||||
FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
|
||||
end
|
||||
4'b10: begin // half
|
||||
FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
|
||||
FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
|
||||
FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
|
||||
FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
|
||||
FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
|
||||
FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
|
||||
FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
|
||||
FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
|
||||
FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
|
||||
FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
always_comb begin
|
||||
if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
|
||||
// an integer output can't be a NaN
|
||||
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
|
||||
always_comb begin
|
||||
// select the result to check
|
||||
case (UnitVal)
|
||||
`FMAUNIT: Res = FmaRes;
|
||||
`DIVUNIT: Res = DivRes;
|
||||
`FMAUNIT: Res = FpRes;
|
||||
`DIVUNIT: Res = FpRes;
|
||||
`CMPUNIT: Res = CmpRes;
|
||||
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
|
||||
`CVTFPUNIT: Res = CvtRes;
|
||||
`CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
|
||||
`CVTFPUNIT: Res = FpRes;
|
||||
endcase
|
||||
|
||||
// select the flag to check
|
||||
case (UnitVal)
|
||||
`FMAUNIT: ResFlg = FmaFlg;
|
||||
`DIVUNIT: ResFlg = DivFlg;
|
||||
`FMAUNIT: ResFlg = Flg;
|
||||
`DIVUNIT: ResFlg = Flg;
|
||||
`CMPUNIT: ResFlg = CmpFlg;
|
||||
`CVTINTUNIT: ResFlg = CvtFlg;
|
||||
`CVTFPUNIT: ResFlg = CvtFlg;
|
||||
`CVTINTUNIT: ResFlg = Flg;
|
||||
`CVTFPUNIT: ResFlg = Flg;
|
||||
endcase
|
||||
end
|
||||
// check results on falling edge of clk
|
||||
@ -1027,117 +757,6 @@ end
|
||||
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
|
||||
// - the sign of the NaN does not matter for the opperations being tested
|
||||
// - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) |
|
||||
(FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) |
|
||||
(FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) |
|
||||
(FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) |
|
||||
(FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) |
|
||||
(FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) |
|
||||
(FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) |
|
||||
(FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) |
|
||||
(FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
case (FmaFmtVal)
|
||||
4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
|
||||
4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
|
||||
4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
|
||||
4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
|
||||
(FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) |
|
||||
(FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) |
|
||||
(FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
|
||||
endcase
|
||||
if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
|
||||
case (FmtVal)
|
||||
4'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
|
||||
@ -1221,77 +840,8 @@ end
|
||||
$stop;
|
||||
end
|
||||
|
||||
// check if the fma tests are correct
|
||||
if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx) & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RNE");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RZ");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RU");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RD");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
|
||||
errors += 1;
|
||||
$display("There is an error in FMA - RNM");
|
||||
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
|
||||
$stop;
|
||||
end
|
||||
|
||||
VectorNum += 1; // increment the vector
|
||||
FmaVectorNum += 1; // increment the vector
|
||||
|
||||
// check to see if there more vectors in this test
|
||||
// *** fix this so that fma and other run sepratly - re-add fma num
|
||||
if ((FmaRneVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRzVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRuVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRdVectors[FmaVectorNum][0] === 1'bx &
|
||||
FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
|
||||
|
||||
// increment the test
|
||||
FmaTestNum += 1;
|
||||
|
||||
// clear the vectors
|
||||
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
// read next files
|
||||
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
|
||||
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
|
||||
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
|
||||
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
|
||||
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
|
||||
|
||||
// set the vector index back to 0
|
||||
FmaVectorNum = 0;
|
||||
|
||||
// if no more Tests - finish
|
||||
if(Tests[TestNum] === "" &
|
||||
FmaRneTests[FmaTestNum] === "" &
|
||||
FmaRzTests[FmaTestNum] === "" &
|
||||
FmaRuTests[FmaTestNum] === "" &
|
||||
FmaRdTests[FmaTestNum] === "" &
|
||||
FmaRnmTests[FmaTestNum] === "") begin
|
||||
$display("\nAll Tests completed with %d errors\n", errors);
|
||||
$stop;
|
||||
end
|
||||
|
||||
$display("Running FMA precision %d", FmaTestNum);
|
||||
end
|
||||
|
||||
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
|
||||
|
||||
@ -1299,14 +849,9 @@ end
|
||||
TestNum += 1;
|
||||
|
||||
// clear the vectors
|
||||
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
|
||||
// read next files
|
||||
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
|
||||
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
|
||||
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
|
||||
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
|
||||
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
|
||||
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
|
||||
|
||||
// set the vector index back to 0
|
||||
VectorNum = 0;
|
||||
@ -1317,12 +862,7 @@ end
|
||||
else FrmNum = 0;
|
||||
|
||||
// if no more Tests - finish
|
||||
if(Tests[TestNum] === "" &
|
||||
FmaRneTests[FmaTestNum] === "" &
|
||||
FmaRzTests[FmaTestNum] === "" &
|
||||
FmaRuTests[FmaTestNum] === "" &
|
||||
FmaRdTests[FmaTestNum] === "" &
|
||||
FmaRnmTests[FmaTestNum] === "") begin
|
||||
if(Tests[TestNum] === "") begin
|
||||
$display("\nAll Tests completed with %d errors\n", errors);
|
||||
$stop;
|
||||
end
|
||||
@ -1335,89 +875,6 @@ endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module readfmavectors (
|
||||
input logic clk,
|
||||
input logic [`FMTBITS-1:0] FmaModFmt, // the modified format
|
||||
input logic [1:0] FmaFmt, // the format of the FMA inputs
|
||||
input logic [`FLEN*4+7:0] TestVector, // the test vector
|
||||
output logic [`FLEN-1:0] Ans, // the correct answer
|
||||
output logic [4:0] AnsFlg, // the correct flag
|
||||
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
|
||||
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
|
||||
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
|
||||
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
|
||||
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
|
||||
output logic XDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic [`FLEN-1:0] X, Y, Z // inputs
|
||||
);
|
||||
|
||||
logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
|
||||
// apply test vectors on rising edge of clk
|
||||
// Format of vectors Inputs(1/2/3)_AnsFlg
|
||||
always @(posedge clk) begin
|
||||
#1;
|
||||
AnsFlg = TestVector[4:0];
|
||||
case (FmaFmt)
|
||||
2'b11: begin // quad
|
||||
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
|
||||
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
|
||||
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
|
||||
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
|
||||
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
|
||||
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
|
||||
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
|
||||
.XExpMaxE, .ZDenormE);
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module readvectors (
|
||||
input logic clk,
|
||||
input logic [`FLEN*4+7:0] TestVector,
|
||||
@ -1451,33 +908,61 @@ module readvectors (
|
||||
`FMAUNIT:
|
||||
case (Fmt)
|
||||
2'b11: begin // quad
|
||||
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
|
||||
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
|
||||
end
|
||||
else begin
|
||||
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
|
||||
end
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
|
||||
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
|
||||
end
|
||||
else begin
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
end
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
|
||||
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
|
||||
end
|
||||
else begin
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
|
||||
end
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
if(OpCtrl === `FMA_OPCTRL) begin
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
|
||||
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
|
||||
end
|
||||
else begin
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
|
||||
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
|
||||
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
end
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
endcase
|
||||
@ -1532,19 +1017,19 @@ module readvectors (
|
||||
2'b11: begin // quad
|
||||
case (OpCtrl[1:0])
|
||||
2'b11: begin // quad
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
|
||||
X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
end
|
||||
endcase
|
||||
@ -1628,12 +1113,12 @@ module readvectors (
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
end
|
||||
2'b01: begin // quad -> long
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
|
||||
X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {TestVector[8+(`XLEN-1):8]};
|
||||
end
|
||||
2'b00: begin // quad -> int
|
||||
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
|
||||
X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
|
||||
SrcA = {`XLEN{1'bx}};
|
||||
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
|
||||
end
|
||||
|
||||
@ -396,6 +396,7 @@ module riscvassertions;
|
||||
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
|
||||
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
|
||||
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
|
||||
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
|
||||
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
|
||||
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
|
||||
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
|
||||
@ -418,6 +419,7 @@ module riscvassertions;
|
||||
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
|
||||
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
|
||||
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
|
||||
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
`define ADD_OPCTRL 3'b110
|
||||
`define MUL_OPCTRL 3'b100
|
||||
`define SUB_OPCTRL 3'b111
|
||||
`define FADD_OPCTRL 3'b000
|
||||
`define FMA_OPCTRL 3'b000
|
||||
`define DIV_OPCTRL 3'b000
|
||||
`define SQRT_OPCTRL 3'b001
|
||||
`define LE_OPCTRL 3'b011
|
||||
@ -21,11 +21,11 @@
|
||||
`define RU 3'b011
|
||||
`define RD 3'b010
|
||||
`define RNM 3'b100
|
||||
`define FMAUNIT 0
|
||||
`define FMAUNIT 2
|
||||
`define DIVUNIT 1
|
||||
`define CVTINTUNIT 2
|
||||
`define CVTFPUNIT 3
|
||||
`define CMPUNIT 4
|
||||
`define CVTINTUNIT 0
|
||||
`define CVTFPUNIT 4
|
||||
`define CMPUNIT 3
|
||||
|
||||
string f16rv32cvtint[] = '{
|
||||
"ui32_to_f16_rne.tv",
|
||||
|
||||
@ -40,9 +40,6 @@ string tvpaths[] = '{
|
||||
"../../addins/embench-iot/bd_speed/src/"
|
||||
};
|
||||
|
||||
|
||||
|
||||
// *** make sure these are somewhere
|
||||
string coremark[] = '{
|
||||
`COREMARK,
|
||||
"coremark.bare.riscv"
|
||||
@ -1105,11 +1102,11 @@ string imperas32f[] = '{
|
||||
// "rv64i_m/D/d_fdiv_b20-01", // looks like flags
|
||||
// "rv64i_m/D/d_fdiv_b2-01", // also flags
|
||||
// "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
|
||||
"rv64i_m/D/d_fdiv_b3-01",
|
||||
// "rv64i_m/D/d_fdiv_b3-01",
|
||||
// "rv64i_m/D/d_fdiv_b4-01", // flags
|
||||
"rv64i_m/D/d_fdiv_b5-01",
|
||||
// "rv64i_m/D/d_fdiv_b5-01",
|
||||
// "rv64i_m/D/d_fdiv_b6-01", // flags
|
||||
"rv64i_m/D/d_fdiv_b7-01",
|
||||
// "rv64i_m/D/d_fdiv_b7-01",
|
||||
// "rv64i_m/D/d_fdiv_b8-01", // flags
|
||||
// "rv64i_m/D/d_fdiv_b9-01", might be a flag too
|
||||
"rv64i_m/D/d_feq_b1-01",
|
||||
|
||||
@ -332,7 +332,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fma1 ////\n\n\n" }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma1/*} -nworst 1 }
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma2/*} -nworst 1 }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
|
||||
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
|
||||
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
|
||||
|
||||
@ -2,482 +2,482 @@
|
||||
BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
|
||||
OUTPUT="./vectors"
|
||||
echo "Creating ui32_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
|
||||
echo "Creating ui32_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
|
||||
echo "Creating ui32_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
|
||||
echo "Creating ui32_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
|
||||
echo "Creating ui64_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
|
||||
echo "Creating ui64_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
|
||||
echo "Creating ui64_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
|
||||
echo "Creating ui64_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
|
||||
echo "Creating i32_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
|
||||
echo "Creating i32_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
|
||||
echo "Creating i32_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
|
||||
echo "Creating i32_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
|
||||
echo "Creating i64_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
|
||||
echo "Creating i64_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
|
||||
echo "Creating i64_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
|
||||
echo "Creating i64_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
|
||||
echo "Creating f16_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
|
||||
echo "Creating f32_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
|
||||
echo "Creating f64_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
|
||||
echo "Creating f128_to_ui32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
|
||||
echo "Creating f16_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
|
||||
echo "Creating f32_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
|
||||
echo "Creating f64_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
|
||||
echo "Creating f128_to_ui64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
|
||||
echo "Creating f16_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
|
||||
echo "Creating f32_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
|
||||
echo "Creating f64_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
|
||||
echo "Creating f128_to_i32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
|
||||
echo "Creating f16_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
|
||||
echo "Creating f32_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
|
||||
echo "Creating f64_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
|
||||
echo "Creating f128_to_i64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
|
||||
echo "Creating f16_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
|
||||
echo "Creating f16_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
|
||||
echo "Creating f16_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
|
||||
echo "Creating f32_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
|
||||
echo "Creating f32_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
|
||||
echo "Creating f32_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
|
||||
echo "Creating f64_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
|
||||
echo "Creating f64_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
|
||||
echo "Creating f64_to_f128 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
|
||||
echo "Creating f128_to_f16 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
|
||||
echo "Creating f128_to_f32 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
|
||||
echo "Creating f128_to_f64 convert vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
|
||||
echo "Creating f16_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
|
||||
echo "Creating f32_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
|
||||
echo "Creating f64_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
|
||||
echo "Creating f128_add vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
|
||||
echo "Creating f16_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
|
||||
echo "Creating f32_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
|
||||
echo "Creating f64_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
|
||||
echo "Creating f128_sub vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
|
||||
echo "Creating f16_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
|
||||
echo "Creating f32_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
|
||||
echo "Creating f64_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
|
||||
echo "Creating f128_mul vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
|
||||
echo "Creating f16_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
|
||||
echo "Creating f32_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
|
||||
echo "Creating f64_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
|
||||
echo "Creating f128_div vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
|
||||
echo "Creating f16_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
|
||||
echo "Creating f32_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
|
||||
echo "Creating f64_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
|
||||
echo "Creating f128_sqrt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
|
||||
echo "Creating f16_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
|
||||
echo "Creating f32_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
|
||||
echo "Creating f64_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
|
||||
echo "Creating f128_eq vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
|
||||
echo "Creating f16_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
|
||||
echo "Creating f32_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
|
||||
echo "Creating f64_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
|
||||
echo "Creating f128_le vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
|
||||
echo "Creating f16_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
|
||||
echo "Creating f32_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
|
||||
echo "Creating f64_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
|
||||
echo "Creating f128_lt vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
|
||||
echo "Creating f16_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
|
||||
echo "Creating f32_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
|
||||
echo "Creating f64_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
|
||||
echo "Creating f128_mulAdd vectors"
|
||||
$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
|
||||
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
|
||||
|
||||
Loading…
Reference in New Issue
Block a user