This commit is contained in:
Madeleine Masser-Frye 2022-06-15 18:30:27 +00:00
commit c2493168b6
81 changed files with 99537 additions and 4059 deletions

4
.gitignore vendored
View File

@ -10,6 +10,10 @@ __pycache__/
addins
addins/riscv-arch-test/Makefile.include
addins/riscv-tests/target
addins/coremark/work/*
addins/embench/bd_speed/*
addins/embench/bd_size/*
benchmarks/embench/wally*.json
#vsim work files to ignore
transcript

3
.gitmodules vendored
View File

@ -17,6 +17,9 @@
[submodule "addins/embench-iot"]
path = addins/embench-iot
url = https://github.com/embench/embench-iot
[submodule "addins/coremark"]
path = addins/coremark
url = https://github.com/eembc/coremark
[submodule "addins/sky130_osu_sc_t18"]
path = addins/sky130_osu_sc_t18
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t18

1
addins/coremark Submodule

@ -0,0 +1 @@
Subproject commit f3e8f2e0941e42961aadcc52750b1b5577c157c9

@ -1 +1 @@
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230

View File

@ -0,0 +1,29 @@
#cmbase=../../addins/coremark
PORT_DIR = $(CURDIR)/riscv64-baremetal
cmbase=../../addins/coremark
work_dir=$(cmbase)/work
sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \
$(cmbase)/core_matrix.c $(cmbase)/core_state.c $(cmbase)/core_util.c \
$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
$(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
riscv64-unknown-elf-objdump -D $< > $<.elf.objdump
riscv64-unknown-elf-elf2hex --bit-width 64 --input $< --output $@
extractFunctionRadix.sh $<.elf.objdump
(cd ../../pipelined/regression && (vsim -c -do "do wally-pipelined-batch.do rv64gc coremark" > $(work_dir)/coremark.sim.log))
cd ../../benchmarks/coremark/
$(work_dir)/coremark.bare.riscv: $(sources) Makefile
# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
# These flags were used by WD on CoreMark
make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta "
# -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error
mkdir -p $(work_dir)
mv $(cmbase)/coremark.bare.riscv $(work_dir)
.PHONY: clean
clean:
rm -f $(work_dir)/*

View File

@ -0,0 +1,385 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
#include <stdio.h>
#include <stdlib.h>
#include "coremark.h"
#if CALLGRIND_RUN
#include <valgrind/callgrind.h>
#endif
#if (MEM_METHOD==MEM_MALLOC)
#include <malloc.h>
/* Function: portable_malloc
Provide malloc() functionality in a platform specific way.
*/
void *portable_malloc(size_t size) {
return malloc(size);
}
/* Function: portable_free
Provide free() functionality in a platform specific way.
*/
void portable_free(void *p) {
free(p);
}
#else
void *portable_malloc(size_t size) {
return NULL;
}
void portable_free(void *p) {
p=NULL;
}
#endif
#if (SEED_METHOD==SEED_VOLATILE)
#if VALIDATION_RUN
volatile ee_s32 seed1_volatile=0x3415;
volatile ee_s32 seed2_volatile=0x3415;
volatile ee_s32 seed3_volatile=0x66;
#endif
#if PERFORMANCE_RUN
volatile ee_s32 seed1_volatile=0x0;
volatile ee_s32 seed2_volatile=0x0;
volatile ee_s32 seed3_volatile=0x66;
#endif
#if PROFILE_RUN
volatile ee_s32 seed1_volatile=0x8;
volatile ee_s32 seed2_volatile=0x8;
volatile ee_s32 seed3_volatile=0x8;
#endif
volatile ee_s32 seed4_volatile=ITERATIONS;
volatile ee_s32 seed5_volatile=0;
#endif
/* Porting: Timing functions
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
Sample implementation for standard time.h and windows.h definitions included.
*/
/* Define: TIMER_RES_DIVIDER
Divider to trade off timer resolution and total time that can be measured.
Use lower values to increase resolution, but make sure that overflow does not occur.
If there are issues with the return value overflowing, increase this value.
*/
#if USE_CLOCK
#define NSECS_PER_SEC CLOCKS_PER_SEC
#define EE_TIMER_TICKER_RATE 1000
#define CORETIMETYPE clock_t
#define GETMYTIME(_t) (*_t=clock())
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
#define TIMER_RES_DIVIDER 1
#define SAMPLE_TIME_IMPLEMENTATION 1
#elif defined(_MSC_VER)
#define NSECS_PER_SEC 10000000
#define EE_TIMER_TICKER_RATE 1000
#define CORETIMETYPE FILETIME
#define GETMYTIME(_t) GetSystemTimeAsFileTime(_t)
#define MYTIMEDIFF(fin,ini) (((*(__int64*)&fin)-(*(__int64*)&ini))/TIMER_RES_DIVIDER)
/* setting to millisces resolution by default with MSDEV */
#ifndef TIMER_RES_DIVIDER
#define TIMER_RES_DIVIDER 1000
#endif
#define SAMPLE_TIME_IMPLEMENTATION 1
#elif HAS_TIME_H
#define NSECS_PER_SEC 1000000000
#define EE_TIMER_TICKER_RATE 1000
#define CORETIMETYPE struct timespec
#define GETMYTIME(_t) clock_gettime(CLOCK_REALTIME,_t)
#define MYTIMEDIFF(fin,ini) ((fin.tv_sec-ini.tv_sec)*(NSECS_PER_SEC/TIMER_RES_DIVIDER)+(fin.tv_nsec-ini.tv_nsec)/TIMER_RES_DIVIDER)
/* setting to 1/1000 of a second resolution by default with linux */
#ifndef TIMER_RES_DIVIDER
#define TIMER_RES_DIVIDER 1000000
#endif
#define SAMPLE_TIME_IMPLEMENTATION 1
#else
// Defined for RISCV
#define NSECS_PER_SEC 1000000000 // TODO: What freq are we assuming?
#define EE_TIMER_TICKER_RATE 1000 // TODO: What is this?
#define CORETIMETYPE clock_t
#define read_csr(reg) ({ unsigned long __tmp; \
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; })
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
#define TIMER_RES_DIVIDER 10000
#define SAMPLE_TIME_IMPLEMENTATION 1
#endif
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
#if SAMPLE_TIME_IMPLEMENTATION
/** Define Host specific (POSIX), or target specific global time variables. */
static CORETIMETYPE start_time_val, stop_time_val;
static unsigned long start_instr_val, stop_instr_val;
/* Function: minstretFunc
This function will count the number of instructions.
*/
unsigned long minstretFunc(void)
{
unsigned long minstretRead = read_csr(minstret);
//ee_printf("Minstret is %lu\n", minstretRead);
return minstretRead;
}
/* Function: minstretDiff
This function will take the difference between the first and second reads from the
MINSTRET csr to determine the number of machine instructions retired between two points
of time
*/
unsigned long minstretDiff(void)
{
unsigned long minstretDifference = MYTIMEDIFF(stop_instr_val, start_instr_val);
return minstretDifference;
}
/* Function: start_time
This function will be called right before starting the timed portion of the benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
*/
void start_time(void) {
start_instr_val = minstretFunc();
GETMYTIME(start_time_val);
//ee_printf("Timer started\n");
//ee_printf(" MTIME: %u\n", start_time_val);
#if CALLGRIND_RUN
CALLGRIND_START_INSTRUMENTATION
#endif
#if MICA
asm volatile("int3");/*1 */
#endif
}
/* Function: stop_time
This function will be called right after ending the timed portion of the benchmark.
Implementation may be capturing a system timer (as implemented in the example code)
or other system parameters - e.g. reading the current value of cpu cycles counter.
*/
void stop_time(void) {
#if CALLGRIND_RUN
CALLGRIND_STOP_INSTRUMENTATION
#endif
#if MICA
asm volatile("int3");/*1 */
#endif
GETMYTIME(stop_time_val);
stop_instr_val = minstretFunc();
//ee_printf("Timer stopped\n");
//ee_printf(" MTIME: %u\n", stop_time_val);
}
/* Function: get_time
Return an abstract "ticks" number that signifies time on the system.
Actual value returned may be cpu cycles, milliseconds or any other value,
as long as it can be converted to seconds by <time_in_secs>.
This methodology is taken to accomodate any hardware or simulated platform.
The sample implementation returns millisecs by default,
and the resolution is controlled by <TIMER_RES_DIVIDER>
*/
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
unsigned long instructions = minstretDiff();
ee_printf(" Called get_time\n");
ee_printf(" Elapsed MTIME: %u\n", elapsed);
ee_printf(" Elapsed MINSTRET: %lu\n", instructions);
ee_printf(" CPI: %lu / %lu\n", elapsed, instructions);
return elapsed;
}
/* Function: time_in_secs
Convert the value returned by get_time to seconds.
The <secs_ret> type is used to accomodate systems with no support for floating point.
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
*/
secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
float retvalint = (float) retval;
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retvalint);
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retval);
ee_printf("RETURN VALUE FROM TIME IN SECS FUNCTION: %f\n", retval);
return retvalint;
}
#else
#error "Please implement timing functionality in core_portme.c"
#endif /* SAMPLE_TIME_IMPLEMENTATION */
ee_u32 default_num_contexts = MULTITHREAD;
/* Function: portable_init
Target specific initialization code
Test for some common mistakes.
*/
void portable_init(core_portable *p, int *argc, char *argv[])
{
#if PRINT_ARGS
int i;
for (i=0; i<*argc; i++) {
ee_printf("Arg[%d]=%s\n",i,argv[i]);
}
#endif
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
}
if (sizeof(ee_u32) != 4) {
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
}
#if (MAIN_HAS_NOARGC && (SEED_METHOD==SEED_ARG))
ee_printf("ERROR! Main has no argc, but SEED_METHOD defined to SEED_ARG!\n");
#endif
#if (MULTITHREAD>1) && (SEED_METHOD==SEED_ARG)
int nargs=*argc,i;
if ((nargs>1) && (*argv[1]=='M')) {
default_num_contexts=parseval(argv[1]+1);
if (default_num_contexts>MULTITHREAD)
default_num_contexts=MULTITHREAD;
/* Shift args since first arg is directed to the portable part and not to coremark main */
--nargs;
for (i=1; i<nargs; i++)
argv[i]=argv[i+1];
*argc=nargs;
}
#endif /* sample of potential platform specific init via command line, reset the number of contexts being used if first argument is M<n>*/
p->portable_id=1;
}
/* Function: portable_fini
Target specific final code
*/
void portable_fini(core_portable *p)
{
p->portable_id=0;
}
#if (MULTITHREAD>1)
/* Function: core_start_parallel
Start benchmarking in a parallel context.
Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.
Other implementations using MCAPI or other standards can easily be devised.
*/
/* Function: core_stop_parallel
Stop a parallel context execution of coremark, and gather the results.
Three implementations are provided, one using pthreads, one using fork and shared mem, and one using fork and sockets.
Other implementations using MCAPI or other standards can easily be devised.
*/
#if USE_PTHREAD
ee_u8 core_start_parallel(core_results *res) {
return (ee_u8)pthread_create(&(res->port.thread),NULL,iterate,(void *)res);
}
ee_u8 core_stop_parallel(core_results *res) {
void *retval;
return (ee_u8)pthread_join(res->port.thread,&retval);
}
#elif USE_FORK
static int key_id=0;
ee_u8 core_start_parallel(core_results *res) {
key_t key=4321+key_id;
key_id++;
res->port.pid=fork();
res->port.shmid=shmget(key, 8, IPC_CREAT | 0666);
if (res->port.shmid<0) {
ee_printf("ERROR in shmget!\n");
}
if (res->port.pid==0) {
iterate(res);
res->port.shm=shmat(res->port.shmid, NULL, 0);
/* copy the validation values to the shared memory area and quit*/
if (res->port.shm == (char *) -1) {
ee_printf("ERROR in child shmat!\n");
} else {
memcpy(res->port.shm,&(res->crc),8);
shmdt(res->port.shm);
}
exit(0);
}
return 1;
}
ee_u8 core_stop_parallel(core_results *res) {
int status;
pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED);
if (wpid != res->port.pid) {
ee_printf("ERROR waiting for child.\n");
if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid);
if (errno == EINTR) ee_printf("errno=Interrupted\n");
return 0;
}
/* after process is done, get the values from the shared memory area */
res->port.shm=shmat(res->port.shmid, NULL, 0);
if (res->port.shm == (char *) -1) {
ee_printf("ERROR in parent shmat!\n");
return 0;
}
memcpy(&(res->crc),res->port.shm,8);
shmdt(res->port.shm);
return 1;
}
#elif USE_SOCKET
static int key_id=0;
ee_u8 core_start_parallel(core_results *res) {
int bound, buffer_length=8;
res->port.sa.sin_family = AF_INET;
res->port.sa.sin_addr.s_addr = htonl(0x7F000001);
res->port.sa.sin_port = htons(7654+key_id);
key_id++;
res->port.pid=fork();
if (res->port.pid==0) { /* benchmark child */
iterate(res);
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (-1 == res->port.sock) /* if socket failed to initialize, exit */ {
ee_printf("Error Creating Socket");
} else {
int bytes_sent = sendto(res->port.sock, &(res->crc), buffer_length, 0,(struct sockaddr*)&(res->port.sa), sizeof (struct sockaddr_in));
if (bytes_sent < 0)
ee_printf("Error sending packet: %s\n", strerror(errno));
close(res->port.sock); /* close the socket */
}
exit(0);
}
/* parent process, open the socket */
res->port.sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
bound = bind(res->port.sock,(struct sockaddr*)&(res->port.sa), sizeof(struct sockaddr));
if (bound < 0)
ee_printf("bind(): %s\n",strerror(errno));
return 1;
}
ee_u8 core_stop_parallel(core_results *res) {
int status;
int fromlen=sizeof(struct sockaddr);
int recsize = recvfrom(res->port.sock, &(res->crc), 8, 0, (struct sockaddr*)&(res->port.sa), &fromlen);
if (recsize < 0) {
ee_printf("Error in receive: %s\n", strerror(errno));
return 0;
}
pid_t wpid = waitpid(res->port.pid,&status,WUNTRACED);
if (wpid != res->port.pid) {
ee_printf("ERROR waiting for child.\n");
if (errno == ECHILD) ee_printf("errno=No such child %d\n",res->port.pid);
if (errno == EINTR) ee_printf("errno=Interrupted\n");
return 0;
}
return 1;
}
#else /* no standard multicore implementation */
#error "Please implement multicore functionality in core_portme.c to use multiple contexts."
#endif /* multithread implementations */
#endif

View File

@ -0,0 +1,296 @@
/*
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original Author: Shay Gal-on
*/
/* Topic: Description
This file contains configuration constants required to execute on different platforms
*/
#ifndef CORE_PORTME_H
#define CORE_PORTME_H
/************************/
/* Data types and settings */
/************************/
/* Configuration: HAS_FLOAT
Define to 1 if the platform supports floating point.
*/
#ifndef HAS_FLOAT
#define HAS_FLOAT 1
#endif
/* Configuration: HAS_TIME_H
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef HAS_TIME_H
#define HAS_TIME_H 0
#endif
/* Configuration: USE_CLOCK
Define to 1 if platform has the time.h header file,
and implementation of functions thereof.
*/
#ifndef USE_CLOCK
#define USE_CLOCK 0
#endif
/* Configuration: HAS_STDIO
Define to 1 if the platform has stdio.h.
*/
#ifndef HAS_STDIO
#define HAS_STDIO 1
#endif
/* Configuration: HAS_PRINTF
Define to 1 if the platform has stdio.h and implements the printf function.
*/
#ifndef HAS_PRINTF
#define HAS_PRINTF 1
#endif
/* Configuration: CORE_TICKS
Define type of return from the timing functions.
*/
#if defined(_MSC_VER)
#include <windows.h>
typedef size_t CORE_TICKS;
#elif HAS_TIME_H
#include <time.h>
typedef clock_t CORE_TICKS;
#else
/* Configuration: size_t and clock_t
Note these need to match the size of the clock output and the xLen the processor supports
*/
typedef unsigned long int size_t;
typedef unsigned long int clock_t;
typedef clock_t CORE_TICKS;
#endif
/* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
Initialize these strings per platform
*/
#ifndef COMPILER_VERSION
#ifdef __GNUC__
#define COMPILER_VERSION "GCC"__VERSION__
#else
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
#endif
#endif
#ifndef COMPILER_FLAGS
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
#endif
#ifndef MEM_LOCATION
#define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)"
#define MEM_LOCATION_UNSPEC 1
#endif
/* Data Types:
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
*Imprtant*:
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
*/
typedef signed short ee_s16;
typedef unsigned short ee_u16;
typedef signed int ee_s32;
typedef double ee_f32;
typedef unsigned char ee_u8;
typedef unsigned int ee_u32;
typedef unsigned long long ee_ptr_int;
typedef size_t ee_size_t;
/* align an offset to point to a 32b value */
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3))
/* Configuration: SEED_METHOD
Defines method to get seed values that cannot be computed at compile time.
Valid values:
SEED_ARG - from command line.
SEED_FUNC - from a system function.
SEED_VOLATILE - from volatile variables.
*/
#ifndef SEED_METHOD
#define SEED_METHOD SEED_VOLATILE
#endif
/* Configuration: MEM_METHOD
Defines method to get a block of memry.
Valid values:
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
MEM_STATIC - to use a static memory array.
MEM_STACK - to allocate the data block on the stack (NYI).
*/
#ifndef MEM_METHOD
#define MEM_METHOD MEM_STATIC
#endif
/* Configuration: MULTITHREAD
Define for parallel execution
Valid values:
1 - only one context (default).
N>1 - will execute N copies in parallel.
Note:
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
to fit a particular architecture.
*/
#ifndef MULTITHREAD
#define MULTITHREAD 1
#endif
/* Configuration: USE_PTHREAD
Sample implementation for launching parallel contexts
This implementation uses pthread_thread_create and pthread_join.
Valid values:
0 - Do not use pthreads API.
1 - Use pthreads API
Note:
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
*/
#ifndef USE_PTHREAD
#define USE_PTHREAD 0
#endif
/* Configuration: USE_FORK
Sample implementation for launching parallel contexts
This implementation uses fork, waitpid, shmget,shmat and shmdt.
Valid values:
0 - Do not use fork API.
1 - Use fork API
Note:
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
*/
#ifndef USE_FORK
#define USE_FORK 0
#endif
/* Configuration: USE_SOCKET
Sample implementation for launching parallel contexts
This implementation uses fork, socket, sendto and recvfrom
Valid values:
0 - Do not use fork and sockets API.
1 - Use fork and sockets API
Note:
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
*/
#ifndef USE_SOCKET
#define USE_SOCKET 0
#endif
/* Configuration: MAIN_HAS_NOARGC
Needed if platform does not support getting arguments to main.
Valid values:
0 - argc/argv to main is supported
1 - argc/argv to main is not supported
*/
#ifndef MAIN_HAS_NOARGC
#define MAIN_HAS_NOARGC 1
#endif
/* Configuration: MAIN_HAS_NORETURN
Needed if platform does not support returning a value from main.
Valid values:
0 - main returns an int, and return value will be 0.
1 - platform does not support returning a value from main
*/
#ifndef MAIN_HAS_NORETURN
#define MAIN_HAS_NORETURN 0
#endif
/* Variable: default_num_contexts
Number of contexts to spawn in multicore context.
Override this global value to change number of contexts used.
Note:
This value may not be set higher then the <MULTITHREAD> define.
To experiment, you can set the <MULTITHREAD> define to the highest value expected, and use argc/argv in the <portable_init> to set this value from the command line.
*/
extern ee_u32 default_num_contexts;
#if (MULTITHREAD>1)
#if USE_PTHREAD
#include <pthread.h>
#define PARALLEL_METHOD "PThreads"
#elif USE_FORK
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/shm.h>
#include <string.h> /* for memcpy */
#define PARALLEL_METHOD "Fork"
#elif USE_SOCKET
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/wait.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#define PARALLEL_METHOD "Sockets"
#else
#define PARALLEL_METHOD "Proprietary"
#error "Please implement multicore functionality in core_portme.c to use multiple contexts."
#endif /* Method for multithreading */
#endif /* MULTITHREAD > 1 */
typedef struct CORE_PORTABLE_S {
#if (MULTITHREAD>1)
#if USE_PTHREAD
pthread_t thread;
#elif USE_FORK
pid_t pid;
int shmid;
void *shm;
#elif USE_SOCKET
pid_t pid;
int sock;
struct sockaddr_in sa;
#endif /* Method for multithreading */
#endif /* MULTITHREAD>1 */
ee_u8 portable_id;
} core_portable;
/* target specific init/fini */
void portable_init(core_portable *p, int *argc, char *argv[]);
void portable_fini(core_portable *p);
#if (SEED_METHOD==SEED_VOLATILE)
#if (VALIDATION_RUN || PERFORMANCE_RUN || PROFILE_RUN)
#define RUN_TYPE_FLAG 1
#else
#if (TOTAL_DATA_SIZE==1200)
#define PROFILE_RUN 1
#else
#define PERFORMANCE_RUN 1
#endif
#endif
#endif /* SEED_METHOD==SEED_VOLATILE */
#endif /* CORE_PORTME_H */

View File

@ -0,0 +1,149 @@
# Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Original Author: Shay Gal-on
#File: core_portme.mak
# Flag: RISCVTOOLS
# Use this flag to point to your RISCV tools
RISCVTOOLS=$(RISCV)
# Flag: RISCVTYPE
# Type of toolchain to use
RISCVTYPE=riscv64-unknown-elf
# Flag: OUTFLAG
# Use this flag to define how to to get an executable (e.g -o)
OUTFLAG= -o
# Flag: CC
# Use this flag to define compiler to use
# david_harris@hmc.edu 20 Nov 2021 removed full path; require
CC = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
#CC = $(RISCVTYPE)-gcc
# Flag: CFLAGS
# Use this flag to define compiler options. Note, you can add compiler options from the command line using XCFLAGS="other flags"
#PORT_CFLAGS = -O2 -static -std=gnu99
PORT_CFLAGS = -O2 -mcmodel=medany -static -fno-tree-loop-distribute-patterns -std=gnu99 -fno-common -nostartfiles -lm -lgcc -T $(PORT_DIR)/link.ld
FLAGS_STR = "$(PORT_CFLAGS) $(XCFLAGS) $(XLFLAGS) $(LFLAGS_END)"
CFLAGS = $(PORT_CFLAGS) -I$(PORT_DIR) -I. -DFLAGS_STR=\"$(FLAGS_STR)\"
#Flag: LFLAGS_END
# Define any libraries needed for linking or other flags that should come at the end of the link line (e.g. linker scripts).
# Note: On certain platforms, the default clock_gettime implementation is supported but requires linking of librt.
LFLAGS_END +=
# Flag: PORT_SRCS
# Port specific source files can be added here
PORT_SRCS = $(PORT_DIR)/core_portme.c $(PORT_DIR)/syscalls.c $(PORT_DIR)/crt.S
# Flag: LOAD
# Define this flag if you need to load to a target, as in a cross compile environment.
# Flag: RUN
# Define this flag if running does not consist of simple invocation of the binary.
# In a cross compile environment, you need to define this.
#For flashing and using a tera term macro, you could use
#LOAD = flash ADDR
#RUN = ttpmacro coremark.ttl
#For copying to target and executing via SSH connection, you could use
#LOAD = scp $(OUTFILE) user@target:~
#RUN = ssh user@target -c
#For native compilation and execution
LOAD = echo Loading done
RUN = spike pk
OEXT = .o
EXE = .bare.riscv
# Flag: SEPARATE_COMPILE
# Define if you need to separate compilation from link stage.
# In this case, you also need to define below how to create an object file, and how to link.
ifdef SEPARATE_COMPILE
LD = $(RISCVTOOLS)/bin/$(RISCVTYPE)-gcc
OBJOUT = -o
LFLAGS =
OFLAG = -o
COUT = -c
# Flag: PORT_OBJS
# Port specific object files can be added here
PORT_OBJS = $(PORT_DIR)/core_portme$(OEXT)
PORT_CLEAN = *$(OEXT)
$(OPATH)%$(OEXT) : %.c
$(CC) $(CFLAGS) $(XCFLAGS) $(COUT) $< $(OBJOUT) $@
endif
# Target: port_prebuild
# Generate any files that are needed before actual build starts.
# E.g. generate profile guidance files. Sample PGO generation for gcc enabled with PGO=1
# - First, check if PGO was defined on the command line, if so, need to add -fprofile-use to compile line.
# - Second, if PGO reference has not yet been generated, add a step to the prebuild that will build a profile-generate version and run it.
# Note - Using REBUILD=1
#
# Use make PGO=1 to invoke this sample processing.
ifdef PGO
ifeq (,$(findstring $(PGO),gen))
PGO_STAGE=build_pgo_gcc
CFLAGS+=-fprofile-use
endif
PORT_CLEAN+=*.gcda *.gcno gmon.out
endif
.PHONY: port_prebuild
port_prebuild: $(PGO_STAGE)
.PHONY: build_pgo_gcc
build_pgo_gcc:
$(MAKE) PGO=gen XCFLAGS="$(XCFLAGS) -fprofile-generate -DTOTAL_DATA_SIZE=1200" ITERATIONS=10 gen_pgo_data REBUILD=1
# Target: port_postbuild
# Generate any files that are needed after actual build end.
# E.g. change format to srec, bin, zip in order to be able to load into flash
.PHONY: port_postbuild
port_postbuild:
# Target: port_postrun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_postrun
port_postrun:
# Target: port_prerun
# Do platform specific after run stuff.
# E.g. reset the board, backup the logfiles etc.
.PHONY: port_prerun
port_prerun:
# Target: port_postload
# Do platform specific after load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_postload
port_postload:
# Target: port_preload
# Do platform specific before load stuff.
# E.g. reset the reset power to the flash eraser
.PHONY: port_preload
port_preload:
# FLAG: OPATH
# Path to the output folder. Default - current folder.
OPATH = ./
MKDIR = mkdir -p
# FLAG: PERL
# Define perl executable to calculate the geomean if running separate.
PERL=/usr/bin/perl

View File

@ -0,0 +1,237 @@
# See LICENSE for license details.
#include "encoding.h"
#if __riscv_xlen == 64
# define LREG ld
# define SREG sd
# define REGBYTES 8
#else
# define LREG lw
# define SREG sw
# define REGBYTES 4
#endif
.section ".text.init"
.globl _start
_start:
li x1, 0
li x2, 0
li x3, 0
li x4, 0
li x5, 0
li x6, 0
li x7, 0
li x8, 0
li x9, 0
li x10,0
li x11,0
li x12,0
li x13,0
li x14,0
li x15,0
li x16,0
li x17,0
li x18,0
li x19,0
li x20,0
li x21,0
li x22,0
li x23,0
li x24,0
li x25,0
li x26,0
li x27,0
li x28,0
li x29,0
li x30,0
li x31,0
# enable FPU and accelerator if present
li t0, MSTATUS_FS | MSTATUS_XS
csrs mstatus, t0
# make sure XLEN agrees with compilation choice
li t0, 1
slli t0, t0, 31
#if __riscv_xlen == 64
bgez t0, 1f
#else
bltz t0, 1f
#endif
2:
li a0, 1
sw a0, tohost, t0
j 2b
1:
#ifdef __riscv_flen
# initialize FPU if we have one
la t0, 1f
csrw mtvec, t0
fssr x0
fmv.s.x f0, x0
fmv.s.x f1, x0
fmv.s.x f2, x0
fmv.s.x f3, x0
fmv.s.x f4, x0
fmv.s.x f5, x0
fmv.s.x f6, x0
fmv.s.x f7, x0
fmv.s.x f8, x0
fmv.s.x f9, x0
fmv.s.x f10,x0
fmv.s.x f11,x0
fmv.s.x f12,x0
fmv.s.x f13,x0
fmv.s.x f14,x0
fmv.s.x f15,x0
fmv.s.x f16,x0
fmv.s.x f17,x0
fmv.s.x f18,x0
fmv.s.x f19,x0
fmv.s.x f20,x0
fmv.s.x f21,x0
fmv.s.x f22,x0
fmv.s.x f23,x0
fmv.s.x f24,x0
fmv.s.x f25,x0
fmv.s.x f26,x0
fmv.s.x f27,x0
fmv.s.x f28,x0
fmv.s.x f29,x0
fmv.s.x f30,x0
fmv.s.x f31,x0
1:
#endif
# initialize trap vector
la t0, trap_entry
csrw mtvec, t0
# initialize global pointer
.option push
.option norelax
la gp, __global_pointer$
.option pop
la tp, _end + 63
and tp, tp, -64
# get core id
csrr a0, mhartid
# for now, assume only 1 core
li a1, 1
1:bgeu a0, a1, 1b
# give each core 128KB of stack + TLS
#define STKSHIFT 17
sll a2, a0, STKSHIFT
add tp, tp, a2
add sp, a0, 1
sll sp, sp, STKSHIFT
add sp, sp, tp
j _init
.align 2
trap_entry:
addi sp, sp, -272
SREG x1, 1*REGBYTES(sp)
SREG x2, 2*REGBYTES(sp)
SREG x3, 3*REGBYTES(sp)
SREG x4, 4*REGBYTES(sp)
SREG x5, 5*REGBYTES(sp)
SREG x6, 6*REGBYTES(sp)
SREG x7, 7*REGBYTES(sp)
SREG x8, 8*REGBYTES(sp)
SREG x9, 9*REGBYTES(sp)
SREG x10, 10*REGBYTES(sp)
SREG x11, 11*REGBYTES(sp)
SREG x12, 12*REGBYTES(sp)
SREG x13, 13*REGBYTES(sp)
SREG x14, 14*REGBYTES(sp)
SREG x15, 15*REGBYTES(sp)
SREG x16, 16*REGBYTES(sp)
SREG x17, 17*REGBYTES(sp)
SREG x18, 18*REGBYTES(sp)
SREG x19, 19*REGBYTES(sp)
SREG x20, 20*REGBYTES(sp)
SREG x21, 21*REGBYTES(sp)
SREG x22, 22*REGBYTES(sp)
SREG x23, 23*REGBYTES(sp)
SREG x24, 24*REGBYTES(sp)
SREG x25, 25*REGBYTES(sp)
SREG x26, 26*REGBYTES(sp)
SREG x27, 27*REGBYTES(sp)
SREG x28, 28*REGBYTES(sp)
SREG x29, 29*REGBYTES(sp)
SREG x30, 30*REGBYTES(sp)
SREG x31, 31*REGBYTES(sp)
csrr a0, mcause
csrr a1, mepc
mv a2, sp
jal handle_trap
csrw mepc, a0
# Remain in M-mode after eret
li t0, MSTATUS_MPP
csrs mstatus, t0
LREG x1, 1*REGBYTES(sp)
LREG x2, 2*REGBYTES(sp)
LREG x3, 3*REGBYTES(sp)
LREG x4, 4*REGBYTES(sp)
LREG x5, 5*REGBYTES(sp)
LREG x6, 6*REGBYTES(sp)
LREG x7, 7*REGBYTES(sp)
LREG x8, 8*REGBYTES(sp)
LREG x9, 9*REGBYTES(sp)
LREG x10, 10*REGBYTES(sp)
LREG x11, 11*REGBYTES(sp)
LREG x12, 12*REGBYTES(sp)
LREG x13, 13*REGBYTES(sp)
LREG x14, 14*REGBYTES(sp)
LREG x15, 15*REGBYTES(sp)
LREG x16, 16*REGBYTES(sp)
LREG x17, 17*REGBYTES(sp)
LREG x18, 18*REGBYTES(sp)
LREG x19, 19*REGBYTES(sp)
LREG x20, 20*REGBYTES(sp)
LREG x21, 21*REGBYTES(sp)
LREG x22, 22*REGBYTES(sp)
LREG x23, 23*REGBYTES(sp)
LREG x24, 24*REGBYTES(sp)
LREG x25, 25*REGBYTES(sp)
LREG x26, 26*REGBYTES(sp)
LREG x27, 27*REGBYTES(sp)
LREG x28, 28*REGBYTES(sp)
LREG x29, 29*REGBYTES(sp)
LREG x30, 30*REGBYTES(sp)
LREG x31, 31*REGBYTES(sp)
addi sp, sp, 272
mret
.section ".tdata.begin"
.globl _tdata_begin
_tdata_begin:
.section ".tdata.end"
.globl _tdata_end
_tdata_end:
.section ".tbss.end"
.globl _tbss_end
_tbss_end:
.section ".tohost","aw",@progbits
.align 6
.globl tohost
tohost: .dword 0
.align 6
.globl fromhost
fromhost: .dword 0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,66 @@
/*======================================================================*/
/* Proxy kernel linker script */
/*======================================================================*/
/* This is the linker script used when building the proxy kernel. */
/*----------------------------------------------------------------------*/
/* Setup */
/*----------------------------------------------------------------------*/
/* The OUTPUT_ARCH command specifies the machine architecture where the
argument is one of the names used in the BFD library. More
specifically one of the entires in bfd/cpu-mips.c */
OUTPUT_ARCH( "riscv" )
ENTRY(_start)
/*----------------------------------------------------------------------*/
/* Sections */
/*----------------------------------------------------------------------*/
SECTIONS
{
/* text: test code section */
. = 0x80000000;
.text.init : { *(.text.init) }
. = ALIGN(0x1000);
.tohost : { *(.tohost) }
.text : { *(.text) }
/* data segment */
.data : { *(.data) }
.sdata : {
__global_pointer$ = . + 0x800;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
/* bss segment */
.sbss : {
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss : { *(.bss) }
/* thread-local data segment */
.tdata :
{
_tls_data = .;
*(.tdata.begin)
*(.tdata)
*(.tdata.end)
}
.tbss :
{
*(.tbss)
*(.tbss.end)
}
/* End of uninitalized data segement */
_end = .;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,540 @@
// See LICENSE for license details.
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <stdio.h>
#include <limits.h>
#include <sys/signal.h>
#include "util.h"
#include "coremark.h"
#include <stdlib.h>
#define SYS_write 64
#undef strcmp
extern volatile uint64_t tohost;
extern volatile uint64_t fromhost;
void _send_char(char c) {
/*#error "You must implement the method _send_char to use this file!\n";
*/
volatile unsigned char *THR=(unsigned char *)0x10000000;
volatile unsigned char *LSR=(unsigned char *)0x10000005;
while(!(*LSR&0b100000));
*THR=c;
while(!(*LSR&0b100000));
}
int sendstring(const char *p){
int n=0;
while (*p) {
_send_char(*p);
n++;
p++;
}
return n;
}
static uintptr_t syscall(uintptr_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2)
{
volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
magic_mem[0] = which;
magic_mem[1] = arg0;
magic_mem[2] = arg1;
magic_mem[3] = arg2;
__sync_synchronize();
tohost = (uintptr_t)magic_mem;
while (fromhost == 0)
;
fromhost = 0;
__sync_synchronize();
return magic_mem[0];
}
#define NUM_COUNTERS 3
static uintptr_t counters[NUM_COUNTERS];
static char* counter_names[NUM_COUNTERS];
void setStats(int enable)
{
int i = 0;
#define READ_CTR(name) do { \
while (i >= NUM_COUNTERS) ; \
uintptr_t csr = read_csr(name); \
if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
counters[i++] = csr; \
} while (0)
READ_CTR(mcycle);
READ_CTR(minstret);
READ_CTR(mhpmcounter3);
READ_CTR(mhpmcounter4);
READ_CTR(mhpmcounter5);
READ_CTR(mhpmcounter6);
READ_CTR(mhpmcounter7);
READ_CTR(mhpmcounter8);
READ_CTR(mhpmcounter9);
READ_CTR(mhpmcounter10);
READ_CTR(mhpmcounter11);
READ_CTR(mhpmcounter12);
#undef READ_CTR
}
void __attribute__((noreturn)) tohost_exit(uintptr_t code)
{
tohost = (code << 1) | 1;
asm ("ecall");
exit(0);
}
uintptr_t __attribute__((weak)) handle_trap(uintptr_t cause, uintptr_t epc, uintptr_t regs[32])
{
tohost_exit(1337);
}
void exit(int code)
{
tohost_exit(code);
}
void abort()
{
exit(128 + SIGABRT);
}
void printstr(const char* s)
{
syscall(SYS_write, 1, (uintptr_t)s, strlen(s));
}
void __attribute__((weak)) thread_entry(int cid, int nc)
{
// multi-threaded programs override this function.
// for the case of single-threaded programs, only let core 0 proceed.
while (cid != 0);
}
int __attribute__((weak)) main(int argc, char** argv)
{
// single-threaded programs override this function.
printstr("Implement main(), foo!\n");
return -1;
}
static void init_tls()
{
register void* thread_pointer asm("tp");
extern char _tls_data;
extern __thread char _tdata_begin, _tdata_end, _tbss_end;
size_t tdata_size = &_tdata_end - &_tdata_begin;
memcpy(thread_pointer, &_tls_data, tdata_size);
size_t tbss_size = &_tbss_end - &_tdata_end;
memset(thread_pointer + tdata_size, 0, tbss_size);
}
void _init(int cid, int nc)
{
init_tls();
thread_entry(cid, nc);
// only single-threaded programs should ever get here.
int ret = main(0, 0);
char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
char* pbuf = buf;
for (int i = 0; i < NUM_COUNTERS; i++)
if (counters[i])
pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
if (pbuf != buf)
printstr(buf);
counters[3] = read_csr(mhpmcounter3) - counters[3];
counters[4] = read_csr(mhpmcounter4) - counters[4];
counters[5] = read_csr(mhpmcounter5) - counters[5];
counters[6] = read_csr(mhpmcounter6) - counters[6];
counters[7] = read_csr(mhpmcounter7) - counters[7];
counters[8] = read_csr(mhpmcounter8) - counters[8];
counters[9] = read_csr(mhpmcounter9) - counters[9];
counters[10] = read_csr(mhpmcounter10) - counters[10];
counters[11] = read_csr(mhpmcounter11) - counters[11];
counters[12] = read_csr(mhpmcounter12) - counters[12];
ee_printf("Load Stalls %d\n", counters[3]);
ee_printf("D-Cache Accesses %d\n", counters[11]);
ee_printf("D-Cache Misses %d\n", counters[12]);
ee_printf("Branches %d\n", counters[5]);
ee_printf("Branches Miss Predictions %d\n", counters[4]);
ee_printf("BTB Misses %d\n", counters[6]);
ee_printf("Jump, JAL, JALR %d\n", counters[7]);
ee_printf("RAS Wrong %d\n", counters[8]);
ee_printf("Returns %d\n", counters[9]);
ee_printf("BP Class Wrong %d\n", counters[10]);
ee_printf("Done printing performance counters\n");
exit(ret);
}
#undef putchar
int putchar(int ch)
{
/*static __thread char buf[64] __attribute__((aligned(64)));
static __thread int buflen = 0;
buf[buflen++] = ch;
if (ch == '\n' || buflen == sizeof(buf))
{
syscall(SYS_write, 1, (uintptr_t)buf, buflen);
buflen = 0;
}
return 0;*/
_send_char(ch);
return 0;
}
void printhex(uint64_t x)
{
char str[17];
int i;
for (i = 0; i < 16; i++)
{
str[15-i] = (x & 0xF) + ((x & 0xF) < 10 ? '0' : 'a'-10);
x >>= 4;
}
str[16] = 0;
printstr(str);
}
static inline void printnum(void (*putch)(int, void**), void **putdat,
unsigned long long num, unsigned base, int width, int padc)
{
unsigned digs[sizeof(num)*CHAR_BIT];
int pos = 0;
while (1)
{
digs[pos++] = num % base;
if (num < base)
break;
num /= base;
}
while (width-- > pos)
putch(padc, putdat);
while (pos-- > 0)
putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
}
static unsigned long long getuint(va_list *ap, int lflag)
{
if (lflag >= 2)
return va_arg(*ap, unsigned long long);
else if (lflag)
return va_arg(*ap, unsigned long);
else
return va_arg(*ap, unsigned int);
}
static long long getint(va_list *ap, int lflag)
{
if (lflag >= 2)
return va_arg(*ap, long long);
else if (lflag)
return va_arg(*ap, long);
else
return va_arg(*ap, int);
}
static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
{
register const char* p;
const char* last_fmt;
register int ch, err;
unsigned long long num;
int base, lflag, width, precision, altflag;
char padc;
while (1) {
while ((ch = *(unsigned char *) fmt) != '%') {
if (ch == '\0')
return;
fmt++;
putch(ch, putdat);
}
fmt++;
// Process a %-escape sequence
last_fmt = fmt;
padc = ' ';
width = -1;
precision = -1;
lflag = 0;
altflag = 0;
reswitch:
switch (ch = *(unsigned char *) fmt++) {
// flag to pad on the right
case '-':
padc = '-';
goto reswitch;
// flag to pad with 0's instead of spaces
case '0':
padc = '0';
goto reswitch;
// width field
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
for (precision = 0; ; ++fmt) {
precision = precision * 10 + ch - '0';
ch = *fmt;
if (ch < '0' || ch > '9')
break;
}
goto process_precision;
case '*':
precision = va_arg(ap, int);
goto process_precision;
case '.':
if (width < 0)
width = 0;
goto reswitch;
case '#':
altflag = 1;
goto reswitch;
process_precision:
if (width < 0)
width = precision, precision = -1;
goto reswitch;
// long flag (doubled for long long)
case 'l':
lflag++;
goto reswitch;
// character
case 'c':
putch(va_arg(ap, int), putdat);
break;
// string
case 's':
if ((p = va_arg(ap, char *)) == NULL)
p = "(null)";
if (width > 0 && padc != '-')
for (width -= strnlen(p, precision); width > 0; width--)
putch(padc, putdat);
for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
putch(ch, putdat);
p++;
}
for (; width > 0; width--)
putch(' ', putdat);
break;
// (signed) decimal
case 'd':
num = getint(&ap, lflag);
if ((long long) num < 0) {
putch('-', putdat);
num = -(long long) num;
}
base = 10;
goto signed_number;
// unsigned decimal
case 'u':
base = 10;
goto unsigned_number;
// (unsigned) octal
case 'o':
// should do something with padding so it's always 3 octits
base = 8;
goto unsigned_number;
// pointer
case 'p':
static_assert(sizeof(long) == sizeof(void*));
lflag = 1;
putch('0', putdat);
putch('x', putdat);
/* fall through to 'x' */
// (unsigned) hexadecimal
case 'X':
case 'x':
base = 16;
unsigned_number:
num = getuint(&ap, lflag);
signed_number:
printnum(putch, putdat, num, base, width, padc);
break;
// escaped '%' character
case '%':
putch(ch, putdat);
break;
// unrecognized escape sequence - just print it literally
default:
putch('%', putdat);
fmt = last_fmt;
break;
}
}
}
int printf(const char* fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vprintfmt((void*)putchar, 0, fmt, ap);
va_end(ap);
return 0; // incorrect return value, but who cares, anyway?
}
int puts(const char* s)
{
printf(s);
printf("\n");
return 0; // incorrect return value, but who cares, anyway?
}
int sprintf(char* str, const char* fmt, ...)
{
va_list ap;
char* str0 = str;
va_start(ap, fmt);
void sprintf_putch(int ch, void** data)
{
char** pstr = (char**)data;
**pstr = ch;
(*pstr)++;
}
vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
*str = 0;
va_end(ap);
return str - str0;
}
void* memcpy(void* dest, const void* src, size_t len)
{
if ((((uintptr_t)dest | (uintptr_t)src | len) & (sizeof(uintptr_t)-1)) == 0) {
const uintptr_t* s = src;
uintptr_t *d = dest;
while (d < (uintptr_t*)(dest + len))
*d++ = *s++;
} else {
const char* s = src;
char *d = dest;
while (d < (char*)(dest + len))
*d++ = *s++;
}
return dest;
}
void* memset(void* dest, int byte, size_t len)
{
if ((((uintptr_t)dest | len) & (sizeof(uintptr_t)-1)) == 0) {
uintptr_t word = byte & 0xFF;
word |= word << 8;
word |= word << 16;
word |= word << 16 << 16;
uintptr_t *d = dest;
while (d < (uintptr_t*)(dest + len)){
*d = word;
d++;}
} else {
char *d = dest;
while (d < (char*)(dest + len)){
*d = byte;
d++;}
}
return dest;
}
size_t strlen(const char *s)
{
const char *p = s;
while (*p)
p++;
return p - s;
}
size_t strnlen(const char *s, size_t n)
{
const char *p = s;
while (n-- && *p)
p++;
return p - s;
}
int strcmp(const char* s1, const char* s2)
{
unsigned char c1, c2;
do {
c1 = *s1++;
c2 = *s2++;
} while (c1 != 0 && c1 == c2);
return c1 - c2;
}
char* strcpy(char* dest, const char* src)
{
char* d = dest;
while ((*d++ = *src++))
;
return dest;
}
long atol(const char* str)
{
long res = 0;
int sign = 0;
while (*str == ' ')
str++;
if (*str == '-' || *str == '+') {
sign = *str == '-';
str++;
}
while (*str) {
res *= 10;
res += *str++ - '0';
}
return sign ? -res : res;
}

View File

@ -0,0 +1,90 @@
// See LICENSE for license details.
#ifndef __UTIL_H
#define __UTIL_H
extern void setStats(int enable);
#include <stdint.h>
#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
static int verify(int n, const volatile int* test, const int* verify)
{
int i;
// Unrolled for faster verification
for (i = 0; i < n/2*2; i+=2)
{
int t0 = test[i], t1 = test[i+1];
int v0 = verify[i], v1 = verify[i+1];
if (t0 != v0) return i+1;
if (t1 != v1) return i+2;
}
if (n % 2 != 0 && test[n-1] != verify[n-1])
return n;
return 0;
}
static int verifyDouble(int n, const volatile double* test, const double* verify)
{
int i;
// Unrolled for faster verification
for (i = 0; i < n/2*2; i+=2)
{
double t0 = test[i], t1 = test[i+1];
double v0 = verify[i], v1 = verify[i+1];
int eq1 = t0 == v0, eq2 = t1 == v1;
if (!(eq1 & eq2)) return i+1+eq1;
}
if (n % 2 != 0 && test[n-1] != verify[n-1])
return n;
return 0;
}
static void __attribute__((noinline)) barrier(int ncores)
{
static volatile int sense;
static volatile int count;
static __thread int threadsense;
__sync_synchronize();
threadsense = !threadsense;
if (__sync_fetch_and_add(&count, 1) == ncores-1)
{
count = 0;
sense = threadsense;
}
else while(sense != threadsense)
;
__sync_synchronize();
}
static uint64_t lfsr(uint64_t x)
{
uint64_t bit = (x ^ (x >> 1)) & 1;
return (x >> 1) | (bit << 62);
}
static uintptr_t insn_len(uintptr_t pc)
{
return (*(unsigned short*)pc & 3) ? 4 : 2;
}
#ifdef __riscv
#include "encoding.h"
#endif
#define stringify_1(s) #s
#define stringify(s) stringify_1(s)
#define stats(code, iter) do { \
unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \
code; \
_c += read_csr(mcycle), _i += read_csr(minstret); \
if (cid == 0) \
printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
} while(0)
#endif //__UTIL_H

View File

@ -2,6 +2,8 @@
# Expanded and developed by Daniel Torres dtorres@hmc.edu
# Compile Embench for Wally
embench_dir = ../../addins/embench-iot
all: sim size
allClean: clean all
@ -10,12 +12,12 @@ build: buildspeed buildsize
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
buildspeed:
../../addins/embench-iot/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find ../../addins/embench-iot/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
$(embench_dir)/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find $(embench_dir)/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
buildsize:
../../addins/embench-iot/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
$(embench_dir)/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
sim: modelsim_build_memfile modelsim_run speed
@ -27,36 +29,36 @@ modelsim_run:
# builds the objdump based on the compiled c elf files
objdump: buildspeed
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
# build memfiles, objdump.lab and objdump.addr files
modelsim_build_memfile: objdump
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find $(embench_dir)/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
# builds the tests for speed, runs them on spike and then launches python script to present results
# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
# you'll need to manually remove one of the two .output files, or run make clean
spike: buildspeed spikecmd speed
spike: buildspeed objdump spike_run speed
# command to run spike on all of the benchmarks
spike_run: buildspeed
find ../../addins/embench-iot/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
spike_run:
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
# python wrapper to present results of embench size benchmark
size: buildsize
../../addins/embench-iot/benchmark_size.py --builddir=bd_size
$(embench_dir)/benchmark_size.py --builddir=bd_size --json-output > wallySize.json
# python wrapper to present results of embench speed benchmark
speed:
../../addins/embench-iot/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1
$(embench_dir)/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeed.json
# deletes all files
clean:
rm -rf ../../addins/embench-iot/bd_speed/
rm -rf ../../addins/embench-iot/bd_size/
rm -rf $(embench_dir)/bd_speed/
rm -rf $(embench_dir)/bd_size/
allclean: clean
rm -rf ../../addins/embench-iot/logs/
rm -rf $(embench_dir)/logs/
# riscv64-unknown-elf-gcc -O2 -g -nostartfiles -I/home/harris/riscv-wally/addins/embench-iot/support -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/boards/ri5cyverilator -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32/chips/generic -I/home/harris/riscv-wally/addins/embench-iot/config/riscv32 -DCPU_MHZ=1 -DWARMUP_HEAT=1 -o main.o /home/harris/riscv-wally/addins/embench-iot/support/main.c

81
benchmarks/graphGen.py Executable file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python
import subprocess
import sys
import json
import plotly.graph_objects as go
coremarkData = {}
embenchData = {}
debug = True
def loadCoremark():
"""loads the coremark data dictionary"""
coremarkPath = "riscv-coremark/work/coremark.sim.log"
keywordlist = ["CoreMark 1.0", "CoreMark Size", "MTIME", "MINSTRET", "Branches Miss Predictions", "BTB Misses"]
for keyword in keywordlist:
bashInst = "cat " + coremarkPath + " | grep \"" + keyword + "\" | cut -d \':\' -f 2 | cut -d \" \" -f 2 | tail -1"
result = subprocess.run(bashInst, stdout=subprocess.PIPE, shell=True)
if (debug): print(result)
coremarkData[keyword] = int(result.stdout)
if (debug): print(coremarkData)
return coremarkData
def loadEmbench():
"""loads the embench data dictionary"""
embenchPath = "embench/wallySpeed.json"
f = open(embenchPath)
embenchData = json.load(f)
if (debug): print(embenchData)
return embenchData
def graphEmbench(embenchData):
ydata = list(embenchData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchData["speed results"]["detailed speed results"].values()) + [embenchData["speed results"]["speed geometric mean"],embenchData["speed results"]["speed geometric sd"],embenchData["speed results"]["speed geometric range"]]
fig = go.Figure(go.Bar(
y=ydata,
x=xdata,
orientation='h'))
fig.show()
def main():
coremarkData = loadCoremark()
embenchData = loadEmbench()
graphEmbench(embenchData)
if __name__ == '__main__':
sys.exit(main())
# x =
# y =
# df = px.data.tips()
# fig = px.bar(df, x="total_bill", y="day", orientation='h')
# fig.show()
# import plotly.express as px
# result = sp.run(['ls', '-l'], stdout=sp.PIPE)
# result.stdout
# fig = go.Figure( go.Bar(
# x=[],
# y=[],
# color="species",
# facet_col="species",
# title="Using update_traces() With Plotly Express Figures"),
# orientation='h')
# fig.show()
#
# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log
# "ls -Art ../addins/embench-iot/logs/*size* | tail -n 1 " # gets most recent embench speed log
## get coremark score
# cat coremarkPath | grep "CoreMark 1.0" | cut -d ':' -f 2 | cut -d " " -f 2
# cat coremarkPath | grep "MTIME" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
# cat coremarkPath | grep "MINSTRET" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1

View File

@ -166,17 +166,17 @@ void _init(int cid, int nc)
counters[11] = read_csr(mhpmcounter11) - counters[11];
counters[12] = read_csr(mhpmcounter12) - counters[12];
ee_printf("Load Stalls %d\n", counters[3]);
ee_printf("D-Cache Accesses %d\n", counters[11]);
ee_printf("D-Cache Misses %d\n", counters[12]);
ee_printf("Branches %d\n", counters[5]);
ee_printf("Branches Miss Predictions %d\n", counters[4]);
ee_printf("BTB Misses %d\n", counters[6]);
ee_printf("Jump, JAL, JALR %d\n", counters[7]);
ee_printf("RAS Wrong %d\n", counters[8]);
ee_printf("Returns %d\n", counters[9]);
ee_printf("BP Class Wrong %d\n", counters[10]);
ee_printf("Done printing performance counters\n");
ee_printf("Load Stalls : %d\n", counters[3]);
ee_printf("D-Cache Accesses : %d\n", counters[11]);
ee_printf("D-Cache Misses : %d\n", counters[12]);
ee_printf("Branches : %d\n", counters[5]);
ee_printf("Branches Miss Predictions : %d\n", counters[4]);
ee_printf("BTB Misses : %d\n", counters[6]);
ee_printf("Jump, JAL, JALR : %d\n", counters[7]);
ee_printf("RAS Wrong : %d\n", counters[8]);
ee_printf("Returns : %d\n", counters[9]);
ee_printf("BP Class Wrong : %d\n", counters[10]);
ee_printf("Done printing performance counters : \n");
exit(ret);
}

View File

@ -0,0 +1,23 @@
# Makefile
CC = gcc
CFLAGS = -O3
LIBS = -lm
LFLAGS = -L.
# Link against the riscv-isa-sim version of SoftFloat rather than
# the regular version to get RISC-V NaN behavior
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS)
%: %.c
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean:
rm -f $(PROGS)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
# fma.do
#
# run with vsim -do "do fma.do"
# add -c before -do for batch simulation
onbreak {resume}
# create library
vlib worklib
vlog -lint -sv -work worklib fma16.v testbench.v
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
vsim -lib worklib testbenchopt
add wave sim:/testbench_fma16/clk
add wave sim:/testbench_fma16/reset
add wave sim:/testbench_fma16/x
add wave sim:/testbench_fma16/y
add wave sim:/testbench_fma16/z
add wave sim:/testbench_fma16/result
add wave sim:/testbench_fma16/rexpected
run -all

View File

@ -0,0 +1,268 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
`define FFLEN 16
`define Nf 10
`define Ne 5
`define BIAS 15
`define EMIN (-(2**(`Ne-1)-1))
`define EMAX (2**(`Ne-1)-1)
`define NaN 16'h7E00
`define INF 15'h7C00
// rounding modes *** update
`define RZ 3'b00
`define RNE 3'b01
`define RM 3'b10
`define RP 3'b11
module fma16(
input logic [`FFLEN-1:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [`FFLEN-1:0] result);
logic [`Nf:0] xm, ym, zm; // U1.Nf
logic [`Ne-1:0] xe, ye, ze; // B_Ne
logic xs, ys, zs;
logic zs1; // sign before optional negation
logic [2*`Nf+1:0] pm; // U2.2Nf
logic [`Ne:0] pe; // B_Ne+1
logic ps; // sign of product
logic [22:0] rm;
logic [`Ne+1:0] re;
logic rs;
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
logic [`Ne+1:0] re2;
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
endmodule
module mult16(
input logic mul,
input logic [`Nf:0] xm, ym,
input logic [`Ne-1:0] xe, ye,
input logic xs, ys,
output logic [2*`Nf+1:0] pm,
output logic [`Ne:0] pe,
output logic ps);
// only multiply if mul = 1
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
assign ps = xs ^ ys; // negative if X xor Y are negative
endmodule
module add16(
input logic add,
input logic [2*`Nf+1:0] pm, // U2.2Nf
input logic [`Nf:0] zm, // U1.Nf
input logic [`Ne:0] pe, // B_Ne+1
input logic [`Ne-1:0] ze, // B_Ne
input logic ps, zs,
input logic negz,
output logic [22:0] rm,
output logic [`Ne+1:0] re, // B_Ne+2
output logic [`Ne+1:0] re2,
output logic rs);
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
logic [`Nf-1:0] prezsticky;
logic zsticky;
logic effectivesub;
logic rs0;
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
logic [`Ne:0] re1;
// Alignment shift
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
always_comb // AlignCount mux; see Muller page 254
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
else begin AlignCnt = 0; re = {2'b0, ze}; end
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
// Effective subtraction
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
// Adder
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
assign rs0 = r[`Nf*3+7]; // sign of the initial result
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
// Sign Logic
assign rs = ps ^ rs0; // flip the sign if necessary
// Leading zero counter
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
// Normalization shift
always_comb // NormCount mux
if (ExpDiff < 3) begin
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
assign rnormed = r2 << NormCnt; // *** update sticky
/* temporarily comment out to start synth
// One-bit secondary normalization
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
else begin // *** handle sticky
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
else begin rnormed2 = rnormed << 1; re2 = re-1; end
end
// round
assign l = rnormed2[***]; // least significant bit
assign r = rnormed2[***-1]; // rounding bit
assign s = ***; // sticky bit
always_comb
case (roundmode)
RZ: roundup = 0;
RP: roundup = ~rs & (r | s);
RM: roundup = rs & (r | s);
RNE: roundup = r & (s | l);
default: roundup = 0;
endcase
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
*/
// *** need to handle rounding to MAXNUM vs. INFINITY
// add or pass product through
/* assign rm = add ? arm : {1'b0, pm};
assign re = add ? are : {1'b0, pe};
assign rs = add ? ars : ps; */
endmodule
module lzc(
input logic [`Nf*3+7:0] r2,
output logic [`Ne:0] leadingzeros
);
endmodule
module postproc16(
input logic [1:0] roundmode,
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
input logic [22:0] rm,
input logic [`Nf:0] zm, // U1.Nf
input logic [6:0] re,
input logic [`Ne-1:0] ze, // B_Ne
input logic rs, zs, ps,
input logic [`Ne+1:0] re2,
output logic [15:0] result);
logic [9:0] uf, uff;
logic [6:0] ue;
logic [6:0] ueb, uebiased;
logic invalid;
// Special cases
// *** not handling signaling NaN
// *** also add overflow/underflow/inexact
always_comb begin
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
end
always_comb
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
ue = re + 7'b1;
uf = rm[20:11];
end else begin // no normalization shift needed
ue = re;
uf = rm[19:10];
end
// overflow
always_comb begin
ueb = ue-7'd15;
if (ue >= 7'd46) begin // overflow
/* uebiased = 7'd30;
uff = 10'h3ff; */
end else begin
uebiased = ue-7'd15;
uff = uf;
end
end
assign result = {rs, uebiased[4:0], uff};
// add special case handling for zeros, NaN, Infinity
endmodule
module signadj16(
input logic negr, negz,
input logic xs, ys, zs1,
output logic ps, zs);
assign ps = xs ^ ys; // sign of product
assign zs = zs1 ^ negz; // sign of addend
endmodule
module unpack16(
input logic [15:0] x, y, z,
output logic [10:0] xm, ym, zm,
output logic [4:0] xe, ye, ze,
output logic xs, ys, zs,
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
endmodule
module unpacknum16(
input logic [15:0] num,
output logic [10:0] m,
output logic [4:0] e,
output logic s,
output logic zero, inf, nan);
logic [9:0] f; // fraction without leading 1
logic [4:0] eb; // biased exponent
assign {s, eb, f} = num; // pull bit fields out of floating-point number
assign m = {1'b1, f}; // prepend leading 1 to fraction
assign e = eb; // leave bias in exponent ***
assign zero = (e == 0 && f == 0);
assign inf = (e == 31 && f == 0);
assign nan = (e == 31 && f != 0);
endmodule

View File

@ -0,0 +1,24 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
module fma16(
input logic [15:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [15:0] result);
endmodule

View File

@ -0,0 +1,240 @@
#include <stdio.h>
#include <stdint.h>
#include "softfloat.h"
#include "softfloat_types.h"
typedef union sp {
float32_t v;
float f;
} sp;
// lists of tests, terminated with 0x8000
uint16_t easyExponents[] = {15, 0x8000};
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
uint16_t zeros[] = {0x0000, 0x8000};
uint16_t infs[] = {0x7C00, 0xFC00};
uint16_t nans[] = {0x7D00, 0x7D01};
void softfloatInit(void) {
softfloat_roundingMode = softfloat_round_minMag;
softfloat_exceptionFlags = 0;
softfloat_detectTininess = softfloat_tininess_beforeRounding;
}
float convFloat(float16_t f16) {
float32_t f32;
float res;
sp r;
f32 = f16_to_f32(f16);
r.v = f32;
res = r.f;
return res;
}
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
float16_t result;
int op, flagVals;
char calc[80], flags[80];
float32_t x32, y32, z32, r32;
float xf, yf, zf, rf;
float16_t smallest;
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
if (!add) z.v = 0x0000; // force z to 0 to avoid add
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
softfloat_exceptionFlags = 0; // clear exceptions
result = f16_mulAdd(x, y, z);
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
(softfloat_exceptionFlags >> 4) % 2,
(softfloat_exceptionFlags >> 2) % 2,
(softfloat_exceptionFlags >> 1) % 2,
(softfloat_exceptionFlags) % 2);
// pack these four flags into one nibble, discarding DZ flag
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
// convert to floats for printing
xf = convFloat(x);
yf = convFloat(y);
zf = convFloat(z);
rf = convFloat(result);
if (mul)
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
// omit denorms, which aren't required for this project
smallest.v = 0x0400;
float16_t resultmag = result;
resultmag.v &= 0x7FFF; // take absolute value
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
}
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
FILE *fptr, int *numCases) {
int i, j;
fprintf(fptr, desc); fprintf(fptr, "\n");
*numCases=0;
for (i=0; e[i] != 0x8000; i++)
for (j=0; f[j] != 0x8000; j++) {
cases[*numCases].v = f[j] | e[i]<<10;
*numCases = *numCases + 1;
}
}
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
z.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<=sgn; k++) {
y.v ^= (k<<15);
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
y.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
z.v = cases[j].v;
for (k=0; k<=sgn; k++) {
z.v ^= (k<<15);
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, l, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (l=0; l<=sgn; l++) {
z.v ^= (l<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
fclose(fptr);
}
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, sx, sy, sz, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
cases[numCases].v = 0x0000; // add +0 case
cases[numCases+1].v = 0x8000; // add -0 case
numCases += 2;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (sx=0; sx<=sgn; sx++) {
x.v ^= (sx<<15);
for (sy=0; sy<=sgn; sy++) {
y.v ^= (sy<<15);
for (sz=0; sz<=sgn; sz++) {
z.v ^= (sz<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
}
}
fclose(fptr);
}
int main()
{
softfloatInit(); // configure softfloat modes
// Test cases: multiplication
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: addition
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: FMA
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: Zero, Infinity, NaN
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
// Full test cases with other rounding modes
softfloat_roundingMode = softfloat_round_near_even;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
softfloat_roundingMode = softfloat_round_min;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
softfloat_roundingMode = softfloat_round_max;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
return 0;
}

8
examples/verilog/fma/lint-fma Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
# check for warnings in Verilog code
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
export PATH=$PATH:/usr/local/bin/
verilator=`which verilator`
basepath=$(dirname $0)/..
$verilator --lint-only --top-module fma16 fma16.v

2
examples/verilog/fma/sim-fma Executable file
View File

@ -0,0 +1,2 @@
vsim -do "do fma.do"

View File

@ -0,0 +1 @@
vsim -c -do "do fma.do"

1
examples/verilog/fma/synth Executable file
View File

@ -0,0 +1 @@
make -C ../../../synthDC synth DESIGN=fma16

View File

@ -0,0 +1,52 @@
/* verilator lint_off STMTDLY */
module testbench_fma16;
reg clk, reset;
reg [15:0] x, y, z, rexpected;
wire [15:0] result;
reg [7:0] ctrl;
reg [3:0] flagsexpected;
reg mul, add, negp, negz;
reg [1:0] roundmode;
reg [31:0] vectornum, errors;
reg [75:0] testvectors[10000:0];
// instantiate device under test
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
// generate clock
always
begin
clk = 1; #5; clk = 0; #5;
end
// at start of test, load vectors and pulse reset
initial
begin
$readmemh("work/fmul_0.tv", testvectors);
vectornum = 0; errors = 0;
reset = 1; #22; reset = 0;
end
// apply test vectors on rising edge of clk
always @(posedge clk)
begin
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
{roundmode, mul, add, negp, negz} = ctrl[5:0];
end
// check results on falling edge of clk
always @(negedge clk)
if (~reset) begin // skip during reset
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
$display("Error: inputs %h * %h + %h", x, y, z);
$display(" result = %h (%h expected)", result, rexpected);
errors = errors + 1;
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 'x) begin
$display("%d tests completed with %d errors",
vectornum, errors);
$stop;
end
end
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,130 @@
#!/usr/bin/perl -w
# torturegen.pl
# David_Harris@hmc.edu 19 April 2022
# Convert TestFloat cases into format for fma16 project torture test
# Strip out cases involving denorms
use strict;
my @basenames = ("add", "mul", "mulAdd");
my @roundingmodes = ("rz", "rd", "ru", "rne");
my @names = ();
foreach my $name (@basenames) {
foreach my $mode (@roundingmodes) {
push(@names, "f16_${name}_$mode.tv");
}
}
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
my $datestring = localtime();
print(TORTURE "// Torture tests generated $datestring by $0\n");
foreach my $tv (@names) {
open(TV, "work/$tv") || die("Can't read $tv");
my $type = &getType($tv); # is it mul, add, mulAdd
my $rm = &getRm($tv); # rounding mode
# if ($rm != 0) { next; } # only do rz
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
my $linecount = 0;
my $babyTorture = 0;
while (<TV>) {
my $line = $_;
$linecount++;
my $density = 10;
if ($type eq "mulAdd") {$density = 500;}
if ($babyTorture) {
$density = 100;
if ($type eq "mulAdd") {$density = 50000;}
}
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
chomp($line); # strip off newline
my @parts = split(/_/, $line);
my ($x, $y, $z, $op, $w, $flags);
$x = $parts[0];
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
$op = $rm << 4;
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
my $opname = sprintf("%02x", $op);
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
$flags = substr($flags, -1); # take last character
if (&fpval($w) eq "NaN") { $w = "7e00"; }
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
my $skip = "";
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
$skip = "Skipped denorm";
}
my $summary = &summary($x, $y, $z, $w, $type);
if ($skip ne "") {
print TORTURE "// $skip $tv line $linecount $line $summary\n"
}
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
}
close(TV);
}
close(TORTURE);
sub fpval {
my $val = shift;
$val = hex($val); # convert hex string to number
my $frac = $val & 0x3FF;
my $exp = ($val >> 10) & 0x1F;
my $sign = $val >> 15;
my $res;
if ($exp == 31 && $frac != 0) { return "NaN"; }
elsif ($exp == 31) { $res = "INF"; }
elsif ($val == 0) { $res = 0; }
elsif ($exp == 0) { $res = "Denorm"; }
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
if ($sign == 1) { $res = "-$res"; }
return $res;
}
sub summary {
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
my $xv = &fpval($x);
my $yv = &fpval($y);
my $zv = &fpval($z);
my $wv = &fpval($w);
if ($type eq "add") { return "$xv + $zv = $wv"; }
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
else {return "$xv * $yv + $zv = $wv"; }
}
sub getType {
my $tv = shift;
if ($tv =~ /mulAdd/) { return("mulAdd"); }
elsif ($tv =~ /mul/) { return "mul"; }
else { return "add"; }
}
sub getRm {
my $tv = shift;
if ($tv =~ /rz/) { return 0; }
elsif ($tv =~ /rne/) { return 1; }
elsif ($tv =~ /rd/) {return 2; }
elsif ($tv =~ /ru/) { return 3; }
else { return "bad"; }
}
sub isdenorm {
my $fp = shift;
my $val = hex($fp);
my $expv = $val >> 10;
$expv = $expv & 0x1F;
my $denorm = 0;
if ($expv == 0 && $val != 0) { $denorm = 1;}
# my $e0 = ($expv == 0);
# my $vn0 = ($val != 0);
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
# print("Num $fp Exp $expv Denorm $denorm Done\n");
return $denorm;
}

View File

@ -0,0 +1,62 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench_fma16/clk
add wave -noupdate /testbench_fma16/reset
add wave -noupdate /testbench_fma16/x
add wave -noupdate /testbench_fma16/y
add wave -noupdate /testbench_fma16/z
add wave -noupdate /testbench_fma16/result
add wave -noupdate /testbench_fma16/rexpected
add wave -noupdate /testbench_fma16/dut/x
add wave -noupdate /testbench_fma16/dut/y
add wave -noupdate /testbench_fma16/dut/z
add wave -noupdate /testbench_fma16/dut/mul
add wave -noupdate /testbench_fma16/dut/add
add wave -noupdate /testbench_fma16/dut/negr
add wave -noupdate /testbench_fma16/dut/negz
add wave -noupdate /testbench_fma16/dut/roundmode
add wave -noupdate /testbench_fma16/dut/result
add wave -noupdate /testbench_fma16/dut/XManE
add wave -noupdate /testbench_fma16/dut/YManE
add wave -noupdate /testbench_fma16/dut/ZManE
add wave -noupdate /testbench_fma16/dut/XExpE
add wave -noupdate /testbench_fma16/dut/YExpE
add wave -noupdate /testbench_fma16/dut/ZExpE
add wave -noupdate /testbench_fma16/dut/PExpE
add wave -noupdate /testbench_fma16/dut/Ne
add wave -noupdate /testbench_fma16/dut/upOneExt
add wave -noupdate /testbench_fma16/dut/XSgnE
add wave -noupdate /testbench_fma16/dut/YSgnE
add wave -noupdate /testbench_fma16/dut/ZSgnE
add wave -noupdate /testbench_fma16/dut/PSgnE
add wave -noupdate /testbench_fma16/dut/ProdManE
add wave -noupdate /testbench_fma16/dut/NfracS
add wave -noupdate /testbench_fma16/dut/ProdManAl
add wave -noupdate /testbench_fma16/dut/ZManExt
add wave -noupdate /testbench_fma16/dut/ZManAl
add wave -noupdate /testbench_fma16/dut/Nfrac
add wave -noupdate /testbench_fma16/dut/res
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
add wave -noupdate /testbench_fma16/dut/NSamt
add wave -noupdate /testbench_fma16/dut/ZExpGreater
add wave -noupdate /testbench_fma16/dut/ACLess
add wave -noupdate /testbench_fma16/dut/upOne
add wave -noupdate /testbench_fma16/dut/KillProd
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 237
configure wave -valuecolwidth 64
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {4083 ns} {4235 ns}

View File

@ -55,20 +55,22 @@
`define Q_NE 32'd15
`define Q_NF 32'd112
`define Q_BIAS 32'd16383
`define Q_FMT 2'd3
`define D_LEN 32'd64
`define D_NE 32'd11
`define D_NF 32'd52
`define D_BIAS 32'd1023
`define D_FMT 32'd1
`define D_FMT 2'd1
`define S_LEN 32'd32
`define S_NE 32'd8
`define S_NF 32'd23
`define S_BIAS 32'd127
`define S_FMT 32'd1
`define S_FMT 2'd0
`define H_LEN 32'd16
`define H_NE 32'd5
`define H_NF 32'd10
`define H_BIAS 32'd15
`define H_FMT 2'd2
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
@ -91,6 +93,12 @@
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
// Disable spurious Verilator warnings
/* verilator lint_off STMTDLY */

View File

@ -1 +0,0 @@
vsim -c -do wally-coremark.do

View File

@ -9,4 +9,4 @@
# sqrt - test square ro
# all - test everything
vsim -do "do testfloat.do rv64fpquad cmp"
vsim -do "do testfloat.do rv64fp mul"

View File

@ -1,45 +0,0 @@
# wally-coremark.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# Use this wally-coremark.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-coremark.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-coremark.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# default to config/coremark, but allow this to be overridden at the command line. For example:
#vlog +incdir+../config/coremark_bare +incdir+../config/shared ../testbench/testbench-coremark_bare.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
vlog +incdir+../config/rv64gc +incdir+../config/shared ../testbench/testbench-coremark_bare.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
vopt +acc work.testbench -o workopt
vsim workopt
mem load -startaddress 268435456 -endaddress 268566527 -filltype value -fillradix hex -filldata 0 /testbench/dut/uncore/ram/ram/RAM
#add log -recursive /*
do wave.do
run -all
#run 21400
#quit

View File

@ -1,502 +0,0 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/reset
add wave -noupdate /testbench/test
add wave -noupdate /testbench/memfilename
add wave -noupdate /testbench/dut/core/SATP_REGW
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/PCE
add wave -noupdate -group {Execution Stage} /testbench/InstrEName
add wave -noupdate -group {Execution Stage} /testbench/dut/core/ifu/InstrE
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/priv/trap/InstrValidM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM
add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM
add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/MemAdrM
add wave -noupdate /testbench/dut/core/ieu/dp/ResultM
add wave -noupdate /testbench/dut/core/ieu/dp/ResultW
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrMisalignedFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrAccessFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/IllegalInstrFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/BreakpointFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadMisalignedFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StoreAmoMisalignedFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadAccessFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StoreAmoAccessFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/EcallFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InstrPageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/LoadPageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/StorePageFaultM
add wave -noupdate -expand -group HDU -group traps /testbench/dut/core/priv/trap/InterruptM
add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/PendingIntsM
add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/CommittedM
add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/core/priv/trap/InstrValidM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/BPPredWrongE
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/CSRWritePendingDEM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/RetM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/TrapM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LoadStallD
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/StoreStallD
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/ICacheStallF
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/hzu/LSUStallM
add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/core/MulDivStallD
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/hzu/FlushF
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM
add wave -noupdate -expand -group HDU -group Flush -color Yellow /testbench/dut/core/FlushW
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallF
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallD
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallE
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallM
add wave -noupdate -expand -group HDU -group Stall -color Orange /testbench/dut/core/StallW
add wave -noupdate -group Bpred -color Orange /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHR
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]}
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPInstrClassE[0]}
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPPredDirWrongE
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} -divider {class check}
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightNonCFI
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongCFI
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassWrongNonCFI
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPRight
add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/BPClassRightBPWrong
add wave -noupdate -group Bpred -radix hexadecimal -childformat {{{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} -radix binary} {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} -radix binary}} -subitemconfig {{/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[6]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[5]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[4]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[3]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[2]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[1]} {-height 16 -radix binary} {/testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel[0]} {-height 16 -radix binary}} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRMuxSel
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRNext
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRUpdateEN
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr0
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr1
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateEN
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRLookup
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCNextF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/RA1
add wave -noupdate -group Bpred -expand -group prediction -radix binary /testbench/dut/core/ifu/bpred/bpred/BPPredF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BTBValidF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BPInstrClassF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/BTBPredPCF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/LookUpPCIndex
add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/TargetPC
add wave -noupdate -group Bpred -expand -group prediction -expand -group ex -radix binary /testbench/dut/core/ifu/bpred/bpred/BPPredE
add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE
add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/BPPredDirWrongE
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdatePCIndex
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateTarget
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateEN
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdatePC
add wave -noupdate -group Bpred -expand -group update -expand -group BTB /testbench/dut/core/ifu/bpred/bpred/TargetPredictor/UpdateTarget
add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHTUpdateAdr
add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PCE
add wave -noupdate -group Bpred -expand -group update -expand -group direction /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/WA1
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/TargetWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/FallThroughWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionPCWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/InstrClassE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/PredictionInstrClassWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredClassNonCFIWrongE
add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE
add wave -noupdate -group Bpred /testbench/dut/core/ifu/bpred/bpred/BPPredWrongE
add wave -noupdate -group {instruction pipeline} /testbench/InstrFName
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/bus/icache/FinalInstrRawF
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrE
add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrM
add wave -noupdate -group {instruction pipeline} /testbench/InstrW
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredPCF
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext0F
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNext1F
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/SelBPPredF
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/BPPredWrongE
add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PrivilegedChangePCM
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ifu/InstrD
add wave -noupdate -group {Decode Stage} /testbench/InstrDName
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/c/RegWriteD
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/RdD
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs1D
add wave -noupdate -group {Decode Stage} /testbench/dut/core/ieu/dp/Rs2D
add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a3
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd1
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd2
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/we3
add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/wd3
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ReadDataW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/CSRReadValW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultSrcW
add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/A
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/B
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ALUControl
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/result
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/FlagsE
add wave -noupdate -group alu -divider internals
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/overflow
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/carry
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/zero
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/neg
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/lt
add wave -noupdate -group alu /testbench/dut/core/ieu/dp/alu/ltu
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1D
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2D
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs1E
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/Rs2E
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdE
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdM
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RdW
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/MemReadE
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RegWriteM
add wave -noupdate -group Forward /testbench/dut/core/ieu/fw/RegWriteW
add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/ForwardAE
add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/ForwardBE
add wave -noupdate -group Forward -color Thistle /testbench/dut/core/ieu/fw/LoadStallD
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/WriteDataE
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/ALUResultE
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/SrcAE
add wave -noupdate -group {alu execution stage} /testbench/dut/core/ieu/dp/SrcBE
add wave -noupdate -group PCS /testbench/dut/core/ifu/PCNextF
add wave -noupdate -group PCS /testbench/dut/core/PCF
add wave -noupdate -group PCS /testbench/dut/core/ifu/PCD
add wave -noupdate -group PCS /testbench/dut/core/PCE
add wave -noupdate -group PCS /testbench/dut/core/PCM
add wave -noupdate -group PCS /testbench/PCW
add wave -noupdate -group muldiv /testbench/dut/core/mdu/Funct3E
add wave -noupdate -group muldiv /testbench/dut/core/mdu/MulDivE
add wave -noupdate -group muldiv /testbench/dut/core/mdu/W64E
add wave -noupdate -group muldiv /testbench/dut/core/mdu/StallM
add wave -noupdate -group muldiv /testbench/dut/core/mdu/StallW
add wave -noupdate -group muldiv /testbench/dut/core/mdu/FlushM
add wave -noupdate -group muldiv /testbench/dut/core/mdu/FlushW
add wave -noupdate -group muldiv /testbench/dut/core/mdu/MulDivResultW
add wave -noupdate -group muldiv /testbench/dut/core/mdu/DivBusyE
add wave -noupdate -group icache -color Gold /testbench/dut/core/ifu/bus/icache/controller/CurrState
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/BasePAdrF
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/HitWay
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/VictimWay
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/WriteEnable}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/SetValid}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/ValidBits}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/core/ifu/bus/icache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/controller/NextState
add wave -noupdate -group icache /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ITLBWriteF
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ReadLineF
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/ReadLineF
add wave -noupdate -group icache /testbench/dut/core/ifu/bus/icache/BasePAdrF
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/hit
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spill
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/ICacheStallF
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spillSave
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/spillSave
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/CntReset
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/PreCntEn
add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/core/ifu/bus/icache/controller/CntEn
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/InstrPAdrF
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/InstrInF
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/FetchCountFlag
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/FetchCount
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/InstrReadF
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/InstrAckF
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/controller/ICacheMemWriteEnable
add wave -noupdate -group icache -expand -group memory /testbench/dut/core/ifu/bus/icache/ICacheBusWriteData
add wave -noupdate -group AHB -color Gold /testbench/dut/core/ebu/BusState
add wave -noupdate -group AHB /testbench/dut/core/ebu/NextBusState
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/AtomicMaskedM
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/InstrReadF
add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/core/ebu/MemSizeM
add wave -noupdate -group AHB /testbench/dut/core/ebu/HCLK
add wave -noupdate -group AHB /testbench/dut/core/ebu/HRESETn
add wave -noupdate -group AHB /testbench/dut/core/ebu/HRDATA
add wave -noupdate -group AHB /testbench/dut/core/ebu/HREADY
add wave -noupdate -group AHB /testbench/dut/core/ebu/HRESP
add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDR
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWDATA
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITE
add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZE
add wave -noupdate -group AHB /testbench/dut/core/ebu/HBURST
add wave -noupdate -group AHB /testbench/dut/core/ebu/HPROT
add wave -noupdate -group AHB /testbench/dut/core/ebu/HTRANS
add wave -noupdate -group AHB /testbench/dut/core/ebu/HMASTLOCK
add wave -noupdate -group AHB /testbench/dut/core/ebu/HADDRD
add wave -noupdate -group AHB /testbench/dut/core/ebu/HSIZED
add wave -noupdate -group AHB /testbench/dut/core/ebu/HWRITED
add wave -noupdate -group lsu -expand -group {LSU ARB} /testbench/dut/core/lsu/arbiter/SelPTW
add wave -noupdate -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu.bus.dcache/dcachefsm/CurrState
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/WalkerPageFaultM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/WriteDataM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMBlockWriteEnableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWordWriteEnableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWayWriteEnable
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMWordEnable
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SRAMBlockWayWriteEnableM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/SelAdrM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/ReadDataBlockM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/DCacheBusWriteData
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/SetValid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/SetDirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/SetDirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/WriteWordEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -expand -group Way1Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/SetValid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/SetDirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -expand -group Way2Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/SetValid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/SetDirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/CacheTagMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/DirtyBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/ValidBits}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[0]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[0]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[1]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[1]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[2]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[2]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[3]/CacheDataMem/WriteEnable}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/word[3]/CacheDataMem/StoredData}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/SetValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/ClearValid
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/SetDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/core/lsu.bus.dcache/ClearDirty
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu.bus.dcache/CacheWays[0]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu.bus.dcache/CacheWays[1]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu.bus.dcache/CacheWays[2]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/HitWay}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/Valid}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/Dirty}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu.bus.dcache/CacheWays[3]/ReadTag}
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataBlockWayMaskedM
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataWordM
add wave -noupdate -group lsu -expand -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu.bus.dcache/ReadDataWordMuxM
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimTag
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimDirtyWay
add wave -noupdate -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu.bus.dcache/VictimDirty
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemRWM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemAdrE
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/MemPAdrM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/Funct3M
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/Funct7M
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/AtomicM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/FlushDCacheM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/CacheableM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/WriteDataM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/ReadDataM
add wave -noupdate -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu.bus.dcache/DCacheStallM
add wave -noupdate -group lsu -expand -group dcache /testbench/dut/core/lsu.bus.dcache/FlushAdrFlag
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu.bus.dcache/HitWay
add wave -noupdate -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/core/lsu.bus.dcache/CacheHit
add wave -noupdate -group lsu -expand -group dcache -group status /testbench/dut/core/lsu.bus.dcache/FetchCount
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/FetchCountFlag
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBPAdr
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBRead
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBWrite
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/AHBAck
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/HRDATA
add wave -noupdate -group lsu -expand -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu.bus.dcache/HWDATA
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/EffectivePrivilegeMode
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/Translate
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/tlbcontrol/DisableTranslation
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/TLBMiss
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/TLBHit
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/PhysicalAddress
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/TLBPageFault
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/LoadAccessFaultM
add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/StoreAmoAccessFaultM
add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/genblk1/tlb/TLBPAdr
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/genblk1/tlb/PTE
add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/genblk1/tlb/TLBWrite
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/PhysicalAddress
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/SelRegions
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/Cacheable
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/Idempotent
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/AtomicAllowed
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/pmachecker/PMAAccessFault
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMAInstrAccessFaultF
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMALoadAccessFaultM
add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/PMAStoreAmoAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPInstrAccessFaultF
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPLoadAccessFaultM
add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/PMPStoreAmoAccessFaultM
add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/genblk1/WalkerState
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/PCF
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/genblk1/TranslationVAdr
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/TranslationPAdr
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/HPTWReadPTE
add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/PTE
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/ITLBMissF
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/DTLBMissM
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/ITLBWriteF
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/DTLBWriteM
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerInstrPageFaultF
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerLoadPageFaultM
add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/WalkerStorePageFaultM
add wave -noupdate -group csr /testbench/dut/core/priv/csr/MIP_REGW
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/TLBWrite
add wave -noupdate -group itlb /testbench/dut/core/ifu/ITLBMissF
add wave -noupdate -group itlb /testbench/dut/core/ifu/immu/PhysicalAddress
add wave -noupdate /testbench/dut/core/lsu.bus.dcache/VAdr
add wave -noupdate /testbench/dut/core/lsu.bus.dcache/MemPAdrM
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HSELPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HADDR
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWRITE
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADY
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HTRANS
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HWDATA
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/UARTIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/GPIOIntr
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HRESPPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HREADYPLIC
add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/ExtIntM
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HCLK
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HSELGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HADDR
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWDATA
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HWRITE
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADY
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HTRANS
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HRESPGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/HREADYGPIO
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsIn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsOut
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOPinsEn
add wave -noupdate -group GPIO /testbench/dut/uncore/gpio/gpio/GPIOIntr
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HCLK
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HSELCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HADDR
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWRITE
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HWDATA
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADY
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HTRANS
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HRESPCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/HREADYCLINT
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIME
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/MTIMECMP
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/TimerIntM
add wave -noupdate -group CLINT /testbench/dut/uncore/clint/clint/SwIntM
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESETn
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HRESPUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/HREADYUART
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/SIN
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DSRb
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/DCDb
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/CTSb
add wave -noupdate -group uart /testbench/dut/uncore/uart/uart/RIb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/SOUT
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RTSb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/DTRb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/OUT1b
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/OUT2b
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/INTR
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/TXRDYb
add wave -noupdate -group uart -expand -group outputs /testbench/dut/uncore/uart/uart/RXRDYb
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HCLK
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE
add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA
add wave -noupdate -radix unsigned /testbench/dut/core/priv/csr/genblk1/counters/genblk1/CYCLE_REGW
add wave -noupdate -radix unsigned /testbench/dut/core/priv/csr/genblk1/counters/genblk1/INSTRET_REGW
add wave -noupdate -label LoadStall -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[3]}
add wave -noupdate -label {Branch Instr} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[5]}
add wave -noupdate -label {BP Dir Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[4]}
add wave -noupdate -label {Jump, Jal, Jalr} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[7]}
add wave -noupdate -label {RAS Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[8]}
add wave -noupdate -label {BTB Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[6]}
add wave -noupdate -label {BP Class Non CFI Wrong} -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[10]}
add wave -noupdate -label DCacheAccess -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[11]}
add wave -noupdate -label DCacheMiss -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[12]}
add wave -noupdate -label Return -radix unsigned {/testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW[9]}
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/HPMCOUNTER_REGW
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/MCOUNTINHIBIT_REGW
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/InstrValidM
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/InstrValidNotFlushedM
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/BPPredDirWrongM
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/genblk1/genblk1/LoadStallM
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/genblk1/genblk1/NextHPMCOUNTERM
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/DCacheMiss
add wave -noupdate /testbench/dut/core/priv/csr/genblk1/counters/DCacheAccess
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 6} {17923831 ns} 0}
quietly wave cursor active 1
configure wave -namecolwidth 250
configure wave -valuecolwidth 297
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {0 ns} {18715695 ns}

View File

@ -67,6 +67,7 @@ add wave -hex /testbench/dut/core/ebu/HTRANS
add wave -hex /testbench/dut/core/ebu/HRDATA
add wave -hex /testbench/dut/core/ebu/HWRITE
add wave -hex /testbench/dut/core/ebu/HWDATA
add wave -hex /testbench/dut/core/ebu/HBURST
add wave -hex /testbench/dut/core/ebu/CaptureDataM
add wave -divider

View File

@ -1,102 +1,9 @@
add wave -noupdate /testbenchfp/clk
add wave -noupdate -radix decimal /testbenchfp/VectorNum
add wave -group Other -noupdate /testbenchfp/FrmNum
add wave -group Other -noupdate /testbenchfp/X
add wave -group Other -noupdate /testbenchfp/Y
add wave -group Other -noupdate /testbenchfp/Z
add wave -group Other -noupdate /testbenchfp/Res
add wave -group Other -noupdate /testbenchfp/Ans
add wave -group Rne -noupdate /testbenchfp/FmaRneX
add wave -group Rne -noupdate /testbenchfp/FmaRneY
add wave -group Rne -noupdate /testbenchfp/FmaRneZ
add wave -group Rne -noupdate /testbenchfp/FmaRneRes
add wave -group Rne -noupdate /testbenchfp/FmaRneAns
add wave -group Rz -noupdate /testbenchfp/FmaRzX
add wave -group Rz -noupdate /testbenchfp/FmaRzY
add wave -group Rz -noupdate /testbenchfp/FmaRzZ
add wave -group Rz -noupdate /testbenchfp/FmaRzRes
add wave -group Rz -noupdate /testbenchfp/FmaRzAns
add wave -group Ru -noupdate /testbenchfp/FmaRuX
add wave -group Ru -noupdate /testbenchfp/FmaRuY
add wave -group Ru -noupdate /testbenchfp/FmaRuZ
add wave -group Ru -noupdate /testbenchfp/FmaRuRes
add wave -group Ru -noupdate /testbenchfp/FmaRuAns
add wave -group Rd -noupdate /testbenchfp/FmaRdX
add wave -group Rd -noupdate /testbenchfp/FmaRdY
add wave -group Rd -noupdate /testbenchfp/FmaRdZ
add wave -group Rd -noupdate /testbenchfp/FmaRdRes
add wave -group Rd -noupdate /testbenchfp/FmaRdAns
add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
add wave -group AllSignals -noupdate /*
add wave -group AllSignals -noupdate /testbenchfp/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
add wave -noupdate /testbenchfp/FrmNum
add wave -noupdate /testbenchfp/X
add wave -noupdate /testbenchfp/Y
add wave -noupdate /testbenchfp/Z
add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans

View File

@ -473,6 +473,7 @@ add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusRead
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAdr
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusAck
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUBusHRDATA
add wave -noupdate -group ifu /testbench/dut/core/ifu/IFUTransComplete
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillF
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/CurrState
add wave -noupdate -group ifu -expand -group spill /testbench/dut/core/ifu/SpillSupport/spillsupport/SpillDataLine0

View File

@ -45,6 +45,10 @@ module ahblite (
input logic IFUBusRead,
output logic [`XLEN-1:0] IFUBusHRDATA,
output logic IFUBusAck,
output logic IFUBusInit,
input logic [2:0] IFUBurstType,
input logic [1:0] IFUTransType,
input logic IFUTransComplete,
// Signals from Data Cache
input logic [`PA_BITS-1:0] LSUBusAdr,
input logic LSUBusRead,
@ -52,7 +56,11 @@ module ahblite (
input logic [`XLEN-1:0] LSUBusHWDATA,
output logic [`XLEN-1:0] LSUBusHRDATA,
input logic [2:0] LSUBusSize,
input logic [2:0] LSUBurstType,
input logic [1:0] LSUTransType,
input logic LSUTransComplete,
output logic LSUBusAck,
output logic LSUBusInit,
// AHB-Lite external signals
(* mark_debug = "true" *) input logic [`AHBW-1:0] HRDATA,
(* mark_debug = "true" *) input logic HREADY, HRESP,
@ -87,6 +95,9 @@ module ahblite (
// Data accesses have priority over instructions. However, if a data access comes
// while an instruction read is occuring, the instruction read finishes before
// the data access can take place.
// *** This is no longer true when adding burst mode. We need to finish the current
// read before doing another read. Need to work this out, but preliminarily we can
// store the current read type in a flop and use that to figure out what burst type to use.
flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextBusState, IDLE, BusState);
@ -100,19 +111,21 @@ module ahblite (
// interface that might be used in place of the ahblite.
always_comb
case (BusState)
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
else if (LSUBusWrite)NextBusState = MEMWRITE;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMREAD: if (~HREADY) NextBusState = MEMREAD;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMWRITE: if (~HREADY) NextBusState = MEMWRITE;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
INSTRREAD: if (~HREADY) NextBusState = INSTRREAD;
else NextBusState = IDLE; // if (IFUBusRead still high) *** need to wait?
default: NextBusState = IDLE;
IDLE: if (LSUBusRead) NextBusState = MEMREAD; // Memory has priority over instructions
else if (LSUBusWrite) NextBusState = MEMWRITE;
else if (IFUBusRead) NextBusState = INSTRREAD;
else NextBusState = IDLE;
MEMREAD: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
else if (LSUTransComplete) NextBusState = IDLE;
else NextBusState = MEMREAD;
MEMWRITE: if (LSUTransComplete & IFUBusRead) NextBusState = INSTRREAD;
else if (LSUTransComplete) NextBusState = IDLE;
else NextBusState = MEMWRITE;
INSTRREAD: if (IFUTransComplete & LSUBusRead) NextBusState = MEMREAD;
else if (IFUTransComplete & LSUBusWrite) NextBusState = MEMWRITE;
else if (IFUTransComplete) NextBusState = IDLE;
else NextBusState = INSTRREAD;
default: NextBusState = IDLE;
endcase
@ -122,7 +135,7 @@ module ahblite (
assign #1 HADDR = AccessAddress;
assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width; ignored on reads anyway
assign HSIZE = (GrantData) ? {1'b0, LSUBusSize[1:0]} : ISize;
assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfH
assign HBURST = (GrantData) ? LSUBurstType : IFUBurstType; // If doing memory accesses, use LSUburst, else use Instruction burst.
/* Cache burst read/writes case statement (hopefully) WRAPS only have access to 4 wraps. X changes position based on HSIZE.
000: Single (SINGLE)
@ -133,15 +146,16 @@ module ahblite (
101: 8-beat incrementing burst (INCR8)
110: 16-beat wrapping burst (WRAP16) [wraps if X in 0X000000]
111: 16-beat incrementing burst (INCR16)
*/
*** Remove if not necessary
*/
assign HPROT = 4'b0011; // not used; see Section 3.7
assign HTRANS = (NextBusState != IDLE) ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise
assign HTRANS = (GrantData) ? LSUTransType : IFUTransType; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise
assign HMASTLOCK = 0; // no locking supported
assign HWRITE = NextBusState == MEMWRITE;
assign HWRITE = (NextBusState == MEMWRITE);
// delay write data by one cycle for
flop #(`XLEN) wdreg(HCLK, LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
flopen #(`XLEN) wdreg(HCLK, (LSUBusAck | LSUBusInit), LSUBusHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN
// delay signals for subword writes
flop #(3) adrreg(HCLK, HADDR[2:0], HADDRD);
flop #(4) sizereg(HCLK, {UnsignedLoadM, HSIZE}, HSIZED);
@ -153,7 +167,9 @@ module ahblite (
assign IFUBusHRDATA = HRDATA;
assign LSUBusHRDATA = HRDATA;
assign IFUBusAck = (BusState == INSTRREAD) & (NextBusState != INSTRREAD);
assign LSUBusAck = (BusState == MEMREAD) & (NextBusState != MEMREAD) | (BusState == MEMWRITE) & (NextBusState != MEMWRITE);
assign IFUBusInit = (BusState != INSTRREAD) & (NextBusState == INSTRREAD);
assign LSUBusInit = (((BusState != MEMREAD) & (NextBusState == MEMREAD)) | (BusState != MEMWRITE) & (NextBusState == MEMWRITE));
assign IFUBusAck = HREADY & (BusState == INSTRREAD);
assign LSUBusAck = HREADY & ((BusState == MEMREAD) | (BusState == MEMWRITE));
endmodule

View File

@ -0,0 +1,69 @@
`include "wally-config.vh"
module cvtshiftcalc(
input logic XZeroM,
input logic ToInt,
input logic IntToFp,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NF:0] XManM, // input mantissas
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic CvtResDenormUfM,
output logic CvtResUf,
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
);
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | LzcInM | 0's if nessisary |
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
{CvtLzcInM, {`NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
endmodule

View File

@ -2,13 +2,12 @@
`include "wally-config.vh"
// FOpCtrlE values
// 111 min
// 110 min
// 101 max
// 010 equal
// 001 less than
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // see above table
@ -20,12 +19,13 @@ module fcmp (
input logic XSNaNE, YSNaNE, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
output logic CmpNVE, // invalid flag
output logic [`FLEN-1:0] CmpResE // compare resilt
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
output logic [`XLEN-1:0] CmpIntResE // compare resilt
);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes;
logic BothZeroE, EitherNaNE, EitherSNaNE;
logic BothZero, EitherNaN, EitherSNaN;
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
@ -36,9 +36,9 @@ module fcmp (
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
assign EQ = (FSrcXE == FSrcYE);
assign BothZeroE = XZeroE&YZeroE;
assign EitherNaNE = XNaNE|YNaNE;
assign EitherSNaNE = XSNaNE|YSNaNE;
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
// flags
@ -47,12 +47,12 @@ module fcmp (
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (FOpCtrlE[2:0])
3'b111: CmpNVE = EitherSNaNE;//min
3'b101: CmpNVE = EitherSNaNE;//max
3'b010: CmpNVE = EitherSNaNE;//equal
3'b001: CmpNVE = EitherNaNE;//less than
3'b011: CmpNVE = EitherNaNE;//less than or equal
default: CmpNVE = 1'b0;
3'b110: CmpNVE = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal
default: CmpNVE = 1'bx;
endcase
end
@ -91,7 +91,7 @@ module fcmp (
`FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = (`FLEN)'(0);
default: NaNRes = {`FLEN{1'bx}};
endcase
else if (`FPSIZES == 4)
@ -112,16 +112,12 @@ module fcmp (
endcase
// when one input is a NaN -output the non-NaN
always_comb
case (FOpCtrlE[2:0])
3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
default: CmpResE = (`FLEN)'(0);
endcase
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
endmodule

View File

@ -10,99 +10,99 @@ module fctrl (
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable
output logic FDivStartD, // Start division or squareroot
output logic [1:0] FResultSelD, // select result to be written to fp register
output logic [1:0] FResSelD, // select result to be written to fp register
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [1:0] FResSelD, // select one of the results done in the memory stage
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
output logic [1:0] PostProcSelD,
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
);
`define FCTRLW 13
`define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD;
//*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder
always_comb
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1;
else case(OpD)
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_00_xx_000_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_00_xx_001_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_00_xx_010_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_00_xx_110_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_00_xx_101_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
7'b10100??: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_1_00_xx_010_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_00_xx_001_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_00_xx_011_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx__0_1; // non-implemented instruction
endcase
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])//***reduce resSel
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_xx_000_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0; // fmv.x.w to int reg
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0; // fmv.x.d to int reg
else ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
endcase
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s s->lu
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
7'b1111000: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0; // fmv.w.x to fp reg
7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
endcase
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d d->lu
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1111001: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0; // fmv.d.x to fp reg
7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
default: ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes:
// 000 - round to nearest, ties to even
@ -121,82 +121,61 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// FResultSel:
// 000 - ReadRes - load
// 001 - FMARes - FMA and multiply
// 010 - FAddRes - add and fp to fp
// 011 - FDivRes - divide and squareroot
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
// FResSel:
// 00 - SrcA - move to fp register
// 01 - SgnRes - sign injection
// 10 - CmpRes - min/max
// 11 - CvtRes - convert to fp
// FIntResSel:
// 00 - CmpRes - less than, equal, or less than or equal
// 01 - FSrcX - move to int register
// 10 - ClassRes - classify
// 11 - CvtRes - convert to signed/unsigned int
// Final Res Sel:
// fp int
// 00 other cmp
// 01 postproc cvt
// 10 store class
// 11 mv
// OpCtrl values:
// div/sqrt
// fdiv = ???0
// fsqrt = ???1
// post processing Sel:
// 00 cvt
// 01 div
// 10 fma
// cmp
// fmin = ?111
// fmax = ?101
// feq = ?010
// flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
// Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00
// 001 - negate sign 00
// 010 - xor sign 00
// 011 - mv to fp 01
// 110 - min 10
// 101 - max 10
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, negate product, negate addend}
// sgn inj
// fsgnj = ??00
// fsgnjn = ??01
// fsgnjx = ??10
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.s.d = 0111
// fcvt.d.s = 0111
// Fmt controls the output for fp -> fp
// convert
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int}
// OpCtrl:
// Fma: {not multiply-add?, negate prod?, negate Z?}
// 000 - fmadd
// 001 - fmsub
// 010 - fnmsub
// 011 - fnmadd
// 100 - mul
// 110 - add
// 111 - sub
// Div:
// 0 - ???
// 1 - ???
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
// Cvt Fp: output format
// 10 - to half
// 00 - to single
// 01 - to double
// 11 - to quad
// Cmp: {equal?, less than?}
// 010 - eq
// 001 - lt
// 011 - le
// 110 - min
// 101 - max
// Sgn:
// 00 - sign
// 01 - negate sign
// 10 - xor sign
endmodule

View File

@ -1,8 +1,5 @@
`include "wally-config.vh"
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
module fcvt (
input logic XSgnE, // input's sign
@ -13,14 +10,13 @@ module fcvt (
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
input logic XZeroE, // is the input zero
input logic XDenormE, // is the input denormalized
input logic XInfE, // is the input infinity
input logic XNaNE, // is the input a NaN
input logic XSNaNE, // is the input a signaling NaN
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
output logic [4:0] CvtFlgE // the conversion's flags
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
output logic CvtResDenormUfE,// does the result underflow or is denormalized
output logic CvtResSgnE, // the result's sign
output logic IntZeroE, // is the integer zero?
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
);
// OpCtrls:
@ -41,34 +37,8 @@ module fcvt (
logic [`FMTBITS-1:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
logic [`NE:0] CalcExp; // the calculated expoent
logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
logic ResDenormUf;// does the result underflow or is denormalized
logic ResUf; // does the result underflow
logic [`LGLEN+`NF:0] Shifted; // the shifted result
logic [`NE-2:0] NewBias; // the bias of the final result
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
logic [`NE-1:0] OldExp; // the old exponent
logic ResSgn; // the result's sign
logic Sticky; // sticky bit - for rounding
logic Round; // round bit - for rounding
logic LSBFrac; // the least significant bit of the fraction - for rounding
logic CalcPlus1; // the calculated plus 1
logic Plus1; // add one to the final result?
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
logic [`NF-1:0] ResFrac; // the result's fraction
logic [`XLEN+1:0] NegRes; // the negation of the result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic Overflow, Underflow, Inexact, Invalid; // flags
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
logic KillRes; // kill the result?
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
@ -97,8 +67,9 @@ module fcvt (
// 1) negate the input if the input is a negitive singed integer
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign IntZeroE = ~|TrimInt;
///////////////////////////////////////////////////////////////////////////
// lzc
@ -107,32 +78,16 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | lzcIn | 0's if nessisary |
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
{LzcIn, {`NF+1{1'b0}}};
// kill the shift if it's negitive
// kill the shift if it's negitive
// select the amount to shift by
// fp -> int:
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
@ -144,47 +99,10 @@ module fcvt (
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
// shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
assign Shifted = ShiftIn << ShiftAmt;
///////////////////////////////////////////////////////////////////////////
// exp calculations
///////////////////////////////////////////////////////////////////////////
@ -215,7 +133,7 @@ module fcvt (
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
default: NewBiasToFp = 1'bx;
default: NewBiasToFp = {`NE-1{1'bx}};
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
@ -262,40 +180,11 @@ module fcvt (
// - shift left to normilize (-1-ZeroCnt)
// - newBias to make the biased exponent
//
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;
///////////////////////////////////////////////////////////////////////////
@ -307,498 +196,7 @@ module fcvt (
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
///////////////////////////////////////////////////////////////////////////
// rounding
///////////////////////////////////////////////////////////////////////////
endmodule
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// 11 - Plus1
// round to zero - do nothing
// round to -infinity - Plus1 if negative
// round to infinity - Plus1 if positive
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 1x - Plus1
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
if (`FPSIZES == 1) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
end else if (`FPSIZES == 2) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
end else if (`FPSIZES == 3) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
`FMT: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
end
`FMT1: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
end
`FMT2: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
end
default: begin
ToFpSticky = 1'bx;
ToFpRound = 1'bx;
ToFpLSBFrac = 1'bx;
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end else if (`FPSIZES == 4) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
2'h3: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
end
2'h1: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
end
2'h0: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
end
2'h2: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end
always_comb
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn;//round down
3'b011: CalcPlus1 = ~ResSgn;//round up
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// dont round if exact
assign Plus1 = CalcPlus1&(Round|Sticky);
// shift the 1 to the propper position for rounding
// - dont round it converting to integer
if (`FPSIZES == 1) begin
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
end else if (`FPSIZES == 2) begin
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
default: ShiftedPlus1 = 0;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
endcase
end
// kill calcExp if the result is denormalized
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
// trim the result's expoent to size
assign ResExp = FullResExp[`NE-1:0];
///////////////////////////////////////////////////////////////////////////
// flags
///////////////////////////////////////////////////////////////////////////
// calculate the flags
// find the maximum exponent (the exponent and larger overflows)
if (`FPSIZES == 1) begin
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
`FMT: begin
MaxExpFp = {`NE{1'b1}};
end
`FMT1: begin
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end
`FMT2: begin
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
end
default: begin
MaxExpFp = 1'bx;
end
endcase
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
2'h3: begin
MaxExpFp = {`Q_NE{1'b1}};
end
2'h1: begin
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
end
2'h0: begin
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
end
2'h2: begin
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end
endcase
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end
// if the result exponent is larger then the maximum possible exponent
// | and the exponent is positive
// | | and the input is not NaN or Infinity
// | | |
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
// if the result is denormalized or underflowed
// | and the result did not round into normal values
// | | and the result is not exact
// | | | and the result isn't NaN
// | | | |
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
// if there were bits thrown away
// | if overflowed or underflowed
// | | and if not a NaN
// | | |
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
// if the result is too small to be represented and not 0
// | and if the result is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// if an input was a singaling NaN(and we're using a FP input)
// |
assign FpInvalid = (XSNaNE&~IntToFp);
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// if the input is NaN or infinity
// | if the integer result overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the result doesn't round to zero
// | | | | or the result rounds up out of bounds
// | | | | and the result didn't underflow
// | | | | |
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive result rounds up out of range
// select the inexact flag to output
assign Invalid = ToInt ? IntInvalid : FpInvalid;
// pack the flags together
// - fp -> int does not set the overflow or underflow flags
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
///////////////////////////////////////////////////////////////////////////
// result selection
///////////////////////////////////////////////////////////////////////////
// determine if you shoould kill the result
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
if (`FPSIZES == 1) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
end else begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
NaNRes = 1'bx;
InfRes = 1'bx;
UfRes = 1'bx;
Res = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
end else begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
end else begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
end
2'h0: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
end else begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
end
2'h2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
end else begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
end
endcase
end
// choose the floating point result
// - if the input is NaN (and using the NaN input) output the NaN result
// - if the input is infinity or the output overflows
// - kill the InfE signal if the input isn't a floating point value
// - if killing the result output the underflow result
// - otherwise output the normal result
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
(XInfE&~IntToFp)|Overflow ? InfRes :
KillRes ? UfRes :
Res;
// *** probably can optimize the negation
// select the overflow integer result
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged result should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive result if needed
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal result (trmined and sign extended if nessisary)
assign CvtIntResE = Invalid ? OfIntRes :
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View File

@ -34,7 +34,7 @@ module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResultSelM, // the result being selected
input logic [1:0] FResSelM, // the result being selected
output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
);
@ -47,10 +47,12 @@ module fhazard(
FForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
// if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
@ -59,7 +61,7 @@ module fhazard(
// if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
@ -68,7 +70,7 @@ module fhazard(
// if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W

147
pipelined/src/fpu/flags.sv Normal file
View File

@ -0,0 +1,147 @@
`include "wally-config.vh"
module flags(
input logic XSgnM,
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, // inputs are NaN
input logic NaNIn, // is a NaN input being used
input logic Sqrt, // Sqrt?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCalcExpM, // the calculated expoent - Cvt
input logic CvtOp, // conversion opperation?
input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
input logic [`NE+1:0] RoundExp, // exponent of the normalized sum
input logic [1:0] NegResMSBS, // the negitive integer result's most significant bits
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
output logic IntInvalid, Invalid, Overflow, Underflow, // flags used to select the res
output logic [4:0] PostProcFlgM // flags
);
logic SigNaN; // is an input a signaling NaN
logic Inexact; // inexact flag
logic FpInexact; // floating point inexact flag
logic IntInexact; // integer inexact flag
logic FmaInvalid; // integer invalid flag
logic DivInvalid; // integer invalid flag
logic DivByZero;
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
logic ShiftGtIntSz; // is the shift greater than the the integer size (use ResExp to account for possible roundning "shift")
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE];
`FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]);
default: ResExpGteMax = 1'bx;
endcase
assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
`Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE];
`D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]);
endcase
// a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN |
// | 64 0's | XLEN | << 64
// | XLEN | 00000... |
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
// | or | | or |
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
// | or | | or |
// larger or equal if:
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64));
end
// if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
// detecting tininess after rounding
// the exponent is negitive
// | the result is denormalized
// | | the result is normal and rounded from a denorm
// | | | and if given an unbounded exponent the result does not round
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
// if the input is NaN or infinity
// | if the integer res overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the res doesn't round to zero
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
assign DivByZero = YZeroM&DivOp;
// Combine flags
// - to integer results do not set the underflow or overflow flags
assign PostProcFlgM = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
endmodule

View File

@ -30,73 +30,6 @@
`include "wally-config.vh"
module fma(
input logic clk,
input logic reset,
input logic FlushM, // flush the memory stage
input logic StallM, // stall memory stage
input logic [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
input logic XSgnM, YSgnM, // input signs - memory stage
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
input logic ZDenormE, // is denorm
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
input logic XNaNM, YNaNM, ZNaNM, // is NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
input logic XInfM, YInfM, ZInfM, // is infinity
output logic [`FLEN-1:0] FMAResM, // FMA result
output logic [4:0] FMAFlgM); // FMA flags
//fma/mult/add
// fmadd = 000
// fmsub = 001
// fnmsub = 010 -(a*b)+c
// fnmadd = 011 -(a*b)-c
// fmul = 100
// fadd = 110
// fsub = 111
// signals transfered between pipeline stages
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
logic Mult;
logic ZDenormM;
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
// E/M pipeline registers
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
.FMAResM, .FMAFlgM);
endmodule
//*** in al units before putting into : ? put in a seperate signal
module fma1(
input logic XSgnE, YSgnE, ZSgnE, // input's signs
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
@ -111,7 +44,7 @@ module fma1(
output logic InvZE, // intert Z
output logic ZSgnEffE, // the modified Z sign
output logic PSgnE, // the product's sign
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt
);
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
@ -151,7 +84,7 @@ module fma1(
add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);
// Choose the positive sum and accompanying LZA result.
assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
@ -332,7 +265,7 @@ endmodule
module loa( //https://ieeexplore.ieee.org/abstract/document/930098
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
endmodule
module fma2(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic Mult, // multiply opperation
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count
output logic [`FLEN-1:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
logic [`NF-1:0] ResultFrac; // Result fraction
logic [`NE-1:0] ResultExp; // Result exponent
logic ResultSgn, ResultSgnTmp; // Result sign
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [`NF+1:0] NormSum; // normalized sum
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic ResultDenorm; // is the result denormalized
logic Sticky, UfSticky; // Sticky bit
logic CalcPlus1; // do you add or subtract one for rounding
logic UfPlus1; // do you add one (for determining underflow flag)
logic Invalid,Underflow,Overflow; // flags
logic Guard, Round; // bits needed to determine rounding
logic UfLSBNormSum; // bits needed to determine rounding for underflow flag
logic [`FLEN:0] RoundAdd; // how much to add to the result
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
.CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
.XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
.FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
// *** use NF where needed
endmodule
module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic Underflow,
input logic InvZM,
input logic NegSumM,
input logic SumZero,
input logic Mult,
output logic ResultSgnTmp,
output logic ResultSgn
);
logic ZeroSgn;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
endmodule
module normalize(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
output logic [`NF+1:0] NormSum, // normalized sum
output logic SumZero, // is the sum zero
output logic NormSumSticky, UfSticky, // sticky bits
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
output logic ResultDenorm // is the result denormalized
);
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign SumExpTmp = SumExpTmpTmp;
end else if (`FPSIZES == 2) begin
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: SumExpTmp = SumExpTmpTmp;
`FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
`FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
default: SumExpTmp = `NE+2'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: SumExpTmp = SumExpTmpTmp;
2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
endcase
end
end
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: PreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
// - add one from exp
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
// Normalize the sum
assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
// LZA correction
assign LZAPlus1 = SumShifted[3*`NF+7];
assign LZAPlus2 = SumShifted[3*`NF+8];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
// Calculate the sticky bit
if (`FPSIZES == 1) begin
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
end else if (`FPSIZES == 2) begin
// 3*NF+5 - NF1 - 3
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
end else if (`FPSIZES == 3) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
end else if (`FPSIZES == 4) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
end
assign UfSticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
// recalculate if the result is denormalized
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
endmodule
module fmaround(
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic UfSticky, // sticky bit for underlow calculation
input logic [`NF+1:0] NormSum, // normalized sum
input logic AddendStickyM, // addend's sticky bit
input logic NormSumSticky, // normalized sum's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResultSgnTmp, // the result's sign
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
output logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`NE-1:0] ResultExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
);
logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfGuard; // guard bit used to caluculate underflow
logic UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
logic [`NF-1:0] NormSumTruncated; // the normalized sum trimed to fit the mantissa
logic UfRound;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
if (`FPSIZES == 1) begin
// determine guard, round, and least significant bit of the result
assign Round = NormSum[1];
assign LSBNormSum = NormSum[2];
// used to determine underflow flag
assign UfRound = NormSum[0];
end else if (`FPSIZES == 2) begin
// \/-------------NF---------------,
// | NF1 | 2 | |
// '-------NF1------^
// determine guard, round, and least significant bit of the result
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
// used to determine underflow flag
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[1];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfRound = NormSum[0];
end
`FMT1: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`NF1+1];
LSBNormSum = NormSum[`NF-`NF1+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`NF1];
end
`FMT2: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`NF2+1];
LSBNormSum = NormSum[`NF-`NF2+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`NF2];
end
default: begin
Round = 1'bx;
LSBNormSum = 1'bx;
UfRound = 1'bx;
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[1];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfRound = NormSum[0];
end
2'h1: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`D_NF+1];
LSBNormSum = NormSum[`NF-`D_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`D_NF];
end
2'h0: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`S_NF+1];
LSBNormSum = NormSum[`NF-`S_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`S_NF];
end
2'h2: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`H_NF+1];
LSBNormSum = NormSum[`NF-`H_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`H_NF];
end
endcase
end
end
// used to determine underflow flag
assign UfLSBNormSum = Round;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
assign NormSumTruncated = NormSum[`NF+1:2];
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[`NE-1:0];
endmodule
module fmaflags(
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
output logic Invalid, Overflow, Underflow, // flags used to select the result
output logic [4:0] FMAFlgM // FMA flags
);
logic SigNaN; // is an input a signaling NaN
logic GtMaxExp; // is exponent greater than the maximum
logic UnderflowFlag, Inexact; // flags
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
if (`FPSIZES == 1) begin
assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
end else if (`FPSIZES == 2) begin
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
`FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
`FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
default: GtMaxExp = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
endcase
end
end
assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed result isn't outputed
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
endmodule
module resultselect(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic ResultSgn, // the result's sign
input logic CalcPlus1, // rounding bits
input logic [`FLEN:0] RoundAdd, // how much to add to the result
input logic Invalid, Overflow, Underflow, // flags
input logic ResultDenorm, // is the result denormalized
input logic [`NE-1:0] ResultExp, // Result exponent
input logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`FLEN-1:0] FMAResM // FMA final result
);
logic InfSgn;
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
if (`FPSIZES == 1) begin
if(`IEEE754) begin
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
if(`IEEE754) begin
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNResult = (`FLEN)'(0);
YNaNResult = (`FLEN)'(0);
ZNaNResult = (`FLEN)'(0);
InvalidResult = (`FLEN)'(0);
end else begin
XNaNResult = (`FLEN)'(0);
end
OverflowResult = (`FLEN)'(0);
KillProdResult = (`FLEN)'(0);
UnderflowResult = (`FLEN)'(0);
InfResult = (`FLEN)'(0);
NormResult = (`FLEN)'(0);
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: begin
if(`IEEE754) begin
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
end
if(`IEEE754) begin
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end else begin
assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end
endmodule

View File

@ -0,0 +1,127 @@
`include "wally-config.vh"
module fmashiftcalc(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic SumZero, // is the result denormalized - calculated before LZA corection
output logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
);
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign ConvNormSumExp = NormSumExp;
end else if (`FPSIZES == 2) begin
assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: ConvNormSumExp = NormSumExp;
`FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: ConvNormSumExp = {`NE+2{1'bx}};
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: ConvNormSumExp = NormSumExp;
2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
endcase
end
end
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
always_comb begin
case (FmtM)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: PreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
always_comb begin
case (FmtM)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
// - add one from exp
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
assign FmaShiftIn = {3'b0, SumM};
assign FmaShiftAmt = FmaNormCntM+DenormShift;
endmodule

View File

@ -45,17 +45,13 @@ module fpu (
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
);
//*** make everything FLEN at some point
//*** add the 128 bit support to the if statement when needed
//*** make new tests for fp using testfloat that include flag checking and all rounding modes
//*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec
//*** only fma/mul and fp <-> int convert flags have been tested. test the others.
// FPU specifics:
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
@ -68,24 +64,24 @@ module fpu (
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic FWriteIntM; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic XSgnM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
@ -95,7 +91,7 @@ module fpu (
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE; // is the input denormalized
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
@ -104,24 +100,43 @@ module fpu (
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
logic FOpCtrlQ;
// Fma Signals
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
// Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
logic [`FLEN-1:0] CvtResE; // FP <-> int convert result
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
logic [`XLEN-1:0] ClassResE; // classify result
logic [`FLEN-1:0] CmpResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
@ -133,10 +148,20 @@ module fpu (
// DECODE STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// |||||||||||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// |||||||||||
//////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
.FmtD, .FrmD, .FWriteIntD);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -150,20 +175,31 @@ module fpu (
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
// EXECUTION STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||||||||||||
// |||
// |||
// |||||||||
// |||
// |||
// ||||||||||||
//////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
generate
@ -178,7 +214,7 @@ module fpu (
endgenerate
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
generate
@ -201,21 +237,12 @@ module fpu (
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// fma - does multiply, add, and multiply-add instructions
fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
.XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE,
.ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE);
// fpdivsqrt using Goldschmidt's iteration
if(`FLEN == 64) begin
@ -245,11 +272,14 @@ module fpu (
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE,
.FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE,
.CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE,
.CvtLzcInE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
@ -269,16 +299,16 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate
// select a result that may be written to the FP register
mux4 #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN)
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
assign IntSrcXE = FSrcXE[`XLEN-1:0];
else
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
// *** make sure the fpu matches the chapter diagram
@ -286,33 +316,68 @@ module fpu (
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE},
{FRegWriteM, FResultSelM, FrmM, FmtM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
// BEGIN MEMORY STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||| |||
// |||||| ||||||
// ||| ||| ||| |||
// ||| ||||| |||
// ||| ||| |||
// ||| |||
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM},
{FRegWriteW, FResultSelW, FmtW});
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
// BEGIN WRITEBACK STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||| |||
// ||| |||
// ||| ||| |||
// ||| ||||| |||
// ||| ||| ||| |||
// |||||| ||||||
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
@ -328,6 +393,6 @@ module fpu (
endgenerate
// select the result to be written to the FP register
if(`FLEN>=64)
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
endmodule // fpu

View File

@ -46,7 +46,7 @@ module fsgninj (
//
// calculate the result's sign
assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]);
assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
// format final result based on precision
// - uses NaN-blocking format
@ -64,7 +64,7 @@ module fsgninj (
`FMT: SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
`FMT1: SgnResE = {{`FLEN-`LEN1{1'b1}}, ResSgn, FSrcXE[`LEN1-2:0]};
`FMT2: SgnResE = {{`FLEN-`LEN2{1'b1}}, ResSgn, FSrcXE[`LEN2-2:0]};
default: SgnResE = 0;
default: SgnResE = {`FLEN{1'bx}};
endcase
else if (`FPSIZES == 4)

View File

@ -0,0 +1,29 @@
`include "wally-config.vh"
module lzacorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic KillProdM, // is the product set to zero
input logic SumZero,
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
output logic [`NE+1:0] SumExp // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
endmodule

View File

@ -0,0 +1,46 @@
`include "wally-config.vh"
// convert shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
module normshift(
input logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt, // normalization shift count
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // is the sum zero
output logic [`NORMSHIFTSZ-1:0] Shifted // is the sum zero
);
assign Shifted = ShiftIn << ShiftAmt;
endmodule

View File

@ -0,0 +1,203 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Modified: 6/23/2021
//
// Purpose: Floating point multiply-accumulate of configurable size
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module postprocess(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic CvtResDenormUfM,
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic CvtResSgnM, // the result's sign
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
input logic [1:0] PostProcSelM, // select result to be written to fp register
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
);
logic [`NF-1:0] ResFrac; // Result fraction
logic [`NE-1:0] ResExp; // Result exponent
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
logic SumZero; // is the sum zero
logic Sticky; // Sticky bit
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
logic UfPlus1; // do you add one (for determining underflow flag)
logic Round; // bits needed to determine rounding
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
logic Mult; // multiply opperation
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic IntInvalid, Overflow, Underflow, Invalid; // flags
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [`NE+1:0] RoundExp;
logic [1:0] NegResMSBS;
logic CvtOp;
logic FmaOp;
logic CvtResUf;
logic DivOp;
logic InfIn;
logic ResSgn;
logic NaNIn;
logic UfLSBRes;
logic Sqrt;
logic [`FMTBITS-1:0] OutFmt;
// signals to help readability
assign Signed = FOpCtrlM[0];
assign Int64 = FOpCtrlM[1];
assign IntToFp = FOpCtrlM[2];
assign ToInt = FWriteIntM;
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
assign CvtOp = (PostProcSelM == 2'b00);
assign FmaOp = (PostProcSelM == 2'b10);
assign DivOp = (PostProcSelM == 2'b01);
assign Sqrt = FOpCtrlM[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: FmtM contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0];
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
always_comb
case(PostProcSelM)
2'b10: begin // fma
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
end
2'b01: begin //div
ShiftAmt = 0;//{DivShiftAmt};
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
end
default: begin
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
ShiftIn = {`NORMSHIFTSZ{1'bx}};
end
endcase
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
.SumZero, .Shifted, .SumExp, .CorrShifted);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
.InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
.ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM,
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid,
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM, .IntInvalid,
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
.FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
endmodule

View File

@ -0,0 +1,282 @@
`include "wally-config.vh"
module resultselect(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic XZeroM,
input logic IntZeroM,
input logic NaNIn,
input logic IntToFp,
input logic Int64,
input logic Signed,
input logic CvtOp,
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
input logic FmaOp,
input logic Plus1,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ResSgn, // the res's sign
input logic [`FLEN:0] RoundAdd, // how much to add to the res
input logic IntInvalid, Invalid, Overflow, // flags
input logic CvtResUf,
input logic [`NE-1:0] ResExp, // Res exponent
input logic [`NE+1:0] FullResExp, // Res exponent
input logic [`NF-1:0] ResFrac, // Res fraction
output logic [`FLEN-1:0] PostProcResM, // final res
output logic [1:0] NegResMSBS,
output logic [`XLEN-1:0] FCvtIntResM // final res
);
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
logic OfResMax;
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic [`XLEN+1:0] NegRes; // the negation of the result
logic KillRes;
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
if (`FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
assign NormRes = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
NormRes = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNRes = (`FLEN)'(0);
YNaNRes = (`FLEN)'(0);
ZNaNRes = (`FLEN)'(0);
InvalidRes = (`FLEN)'(0);
end else begin
InvalidRes = (`FLEN)'(0);
end
OfRes = (`FLEN)'(0);
KillProdRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
NormRes = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
// determine if you shoould kill the res - Cvt
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
if(`IEEE754) begin
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
YNaNM&~CvtOp ? YNaNRes :
ZNaNM&FmaOp ? ZNaNRes :
Invalid ? InvalidRes :
Overflow|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
KillRes ? UfRes :
NormRes;
end else begin
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
Overflow|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
KillRes ? UfRes :
NormRes;
end
///////////////////////////////////////////////////////////////////////////////////////
//
// ||||||||||| ||| ||| |||||||||||||
// ||| |||||| ||| |||
// ||| ||| ||| ||| |||
// ||| ||| |||||| |||
// ||||||||||| ||| ||| |||
//
///////////////////////////////////////////////////////////////////////////////////////
// *** probably can optimize the negation
// select the overflow integer res
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive res if needed
assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
//*** false critical path probably
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
assign FCvtIntResM = IntInvalid ? OfIntRes :
CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View File

@ -0,0 +1,50 @@
`include "wally-config.vh"
module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic InvZM,
input logic ZInfM,
input logic InfIn,
input logic NegSumM,
input logic [1:0] PostProcSelM,
input logic [`NE+1:0] SumExp,
input logic SumZero,
input logic Mult,
input logic Round,
input logic Sticky,
input logic CvtResSgnM,
output logic ResSgn
);
logic ZeroSgn;
logic InfSgn;
logic FmaResSgn;
logic FmaResSgnTmp;
logic Underflow;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
always_comb
case(PostProcSelM)
2'b10: ResSgn = FmaResSgn; // fma
2'b00: ResSgn = CvtResSgnM; // cvt
2'b01: ResSgn = 0; // divide
default: ResSgn = 1'bx;
endcase
endmodule

316
pipelined/src/fpu/round.sv Normal file
View File

@ -0,0 +1,316 @@
`include "wally-config.vh"
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic ToInt,
input logic CvtOp,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
input logic AddendStickyM, // addend's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
output logic [`NE-1:0] ResExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`NE+1:0] RoundExp,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, UfLSBRes // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic UfRound;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
assign IntRes = CvtOp & ToInt;
assign FpRes = ~IntRes;
// sticky bit calculation
if (`FPSIZES == 1) begin
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
end
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpRound = 1'bx;
FpLSBRes = 1'bx;
FpUfRound = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag
assign UfLSBRes = FpRound;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (OutFmt)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (OutFmt)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
// determine the result to be roundned
assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = 0; // divide
default: RoundExp = 0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
assign ResExp = FullResExp[`NE-1:0];
endmodule

View File

@ -98,7 +98,7 @@ module unpackinput (
`FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
default: BadNaNBox = 0;
default: BadNaNBox = 1'bx;
endcase
// extract the sign bit
@ -107,7 +107,7 @@ module unpackinput (
`FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1];
default: Sgn = 0;
default: Sgn = 1'bx;
endcase
// extract the fraction
@ -116,7 +116,7 @@ module unpackinput (
`FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
default: Frac = 0;
default: Frac = {`NF{1'bx}};
endcase
// is the exponent non-zero
@ -125,7 +125,7 @@ module unpackinput (
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
default: ExpNonZero = 0;
default: ExpNonZero = 1'bx;
endcase
// example double to single conversion:
@ -142,7 +142,7 @@ module unpackinput (
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
default: Exp = 0;
default: Exp = {`NE{1'bx}};
endcase
// is the exponent all 1's
@ -151,7 +151,7 @@ module unpackinput (
`FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
default: ExpMax = 0;
default: ExpMax = 1'bx;
endcase
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half

View File

@ -61,6 +61,8 @@ module datapath (
(* mark_debug = "true" *) input logic RegWriteW,
input logic SquashSCW,
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [1:0] FResSelW,
output logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
@ -120,14 +122,17 @@ module datapath (
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else begin:fpmux
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end
// handle Store Conditional result if atomic extension supported

View File

@ -61,6 +61,8 @@ module ieu (
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [1:0] FResSelW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
output logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
@ -105,8 +107,8 @@ module ieu (
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(

View File

@ -38,9 +38,13 @@ module ifu (
// Bus interface
(* mark_debug = "true" *) input logic [`XLEN-1:0] IFUBusHRDATA,
(* mark_debug = "true" *) input logic IFUBusAck,
(* mark_debug = "true" *) input logic IFUBusInit,
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUBusAdr,
(* mark_debug = "true" *) output logic IFUBusRead,
(* mark_debug = "true" *) output logic IFUStallF,
(* mark_debug = "true" *) output logic [2:0] IFUBurstType,
(* mark_debug = "true" *) output logic [1:0] IFUTransType,
(* mark_debug = "true" *) output logic IFUTransComplete,
(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF,
// Execute
output logic [`XLEN-1:0] PCLinkE,
@ -201,8 +205,8 @@ module ifu (
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
busdp(.clk, .reset,
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusWriteCrit(),
.LSUBusRead(IFUBusRead), .LSUBusSize(),
.LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusInit(IFUBusInit), .LSUBusWrite(), .LSUBusWriteCrit(),
.LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUBurstType(IFUBurstType), .LSUTransType(IFUTransType), .LSUTransComplete(IFUTransComplete),
.LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr),
.WordCount(),
.DCacheFetchLine(ICacheFetchLine),

View File

@ -40,9 +40,13 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
// bus interface
input logic [`XLEN-1:0] LSUBusHRDATA,
input logic LSUBusAck,
input logic LSUBusInit,
output logic LSUBusWrite,
output logic LSUBusRead,
output logic [2:0] LSUBusSize,
output logic [2:0] LSUBusSize,
output logic [2:0] LSUBurstType,
output logic [1:0] LSUTransType, // For AHBLite
output logic LSUTransComplete,
input logic [2:0] LSUFunct3M,
output logic [`PA_BITS-1:0] LSUBusAdr, // ** change name to HADDR to make ahb lite.
output logic [LOGWPL-1:0] WordCount,
@ -66,13 +70,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
localparam integer WordCountThreshold = CACHE_ENABLED ? WORDSPERLINE - 1 : 0;
logic [`PA_BITS-1:0] LocalLSUBusAdr;
logic [LOGWPL-1:0] WordCountDelayed;
// *** implement flops as an array if feasbile; DCacheBusWriteData might be a problem
// *** better name than DCacheBusWriteData
genvar index;
for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer
logic [WORDSPERLINE-1:0] CaptureWord;
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCount);
assign CaptureWord[index] = LSUBusAck & LSUBusRead & (index == WordCountDelayed);
flopen #(`XLEN) fb(.clk, .en(CaptureWord[index]), .d(LSUBusHRDATA),
.q(DCacheBusWriteData[(index+1)*`XLEN-1:index*`XLEN]));
end
@ -83,6 +89,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED)
busfsm #(WordCountThreshold, LOGWPL, CACHE_ENABLED) busfsm(
.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine,
.LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
.DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount);
.LSUBusAck, .LSUBusInit, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusWriteCrit, .LSUBusRead,
.LSUBurstType, .LSUTransType, .LSUTransComplete, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount, .WordCountDelayed);
endmodule

View File

@ -41,6 +41,7 @@ module busfsm #(parameter integer WordCountThreshold,
input logic DCacheFetchLine,
input logic DCacheWriteLine,
input logic LSUBusAck,
input logic LSUBusInit, // This might be better as LSUBusLock, or to send this using LSUBusAck.
input logic CPUBusy,
input logic CacheableM,
@ -48,10 +49,13 @@ module busfsm #(parameter integer WordCountThreshold,
output logic LSUBusWrite,
output logic LSUBusWriteCrit,
output logic LSUBusRead,
output logic [2:0] LSUBurstType,
output logic LSUTransComplete,
output logic [1:0] LSUTransType,
output logic DCacheBusAck,
output logic BusCommittedM,
output logic SelUncachedAdr,
output logic [LOGWPL-1:0] WordCount);
output logic [LOGWPL-1:0] WordCount, WordCountDelayed);
@ -61,7 +65,8 @@ module busfsm #(parameter integer WordCountThreshold,
logic CntReset;
logic WordCountFlag;
logic [LOGWPL-1:0] NextWordCount;
logic UnCachedAccess;
logic UnCachedAccess, UnCachedRW;
logic [2:0] LocalBurstType;
typedef enum logic [2:0] {STATE_BUS_READY,
@ -75,18 +80,27 @@ module busfsm #(parameter integer WordCountThreshold,
(* mark_debug = "true" *) busstatetype BusCurrState, BusNextState;
// Used to send address for address stage of AHB.
flopenr #(LOGWPL)
WordCountReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(NextWordCount),
.q(WordCount));
.q(WordCount));
// Used to store data from data phase of AHB.
flopenr #(LOGWPL)
WordCountDelayedReg(.clk(clk),
.reset(reset | CntReset),
.en(CntEn),
.d(WordCount),
.q(WordCountDelayed));
assign NextWordCount = WordCount + 1'b1;
assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]);
assign CntEn = PreCntEn & LSUBusAck;
assign PreCntEn = (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_WRITE);
assign WordCountFlag = (WordCountDelayed == WordCountThreshold[LOGWPL-1:0]); // Detect when we are waiting on the final access.
assign CntEn = (PreCntEn & LSUBusAck | (LSUBusInit)) & ~WordCountFlag & ~UnCachedRW; // Want to count when doing cache accesses and we aren't wrapping up.
assign UnCachedAccess = ~CACHE_ENABLED | ~CacheableM;
@ -120,14 +134,29 @@ module busfsm #(parameter integer WordCountThreshold,
endcase
end
always_comb begin
case(WordCountThreshold)
0: LocalBurstType = 3'b000;
3: LocalBurstType = 3'b011; // INCR4
7: LocalBurstType = 3'b101; // INCR8
15: LocalBurstType = 3'b111; // INCR16
default: LocalBurstType = 3'b001; // INCR without end.
endcase
end
assign CntReset = BusCurrState == STATE_BUS_READY;
// Would these be better as always_comb statements or muxes?
assign LSUBurstType = (UnCachedRW) ? 3'b0 : LocalBurstType; // Don't want to use burst when doing an Uncached Access.
assign LSUTransComplete = (UnCachedRW) ? LSUBusAck : WordCountFlag & LSUBusAck;
// Use SEQ if not doing first word, NONSEQ if doing the first read/write, and IDLE if finishing up.
assign LSUTransType = (|WordCount) & ~UnCachedRW ? 2'b11 : (LSUBusRead | LSUBusWrite) & (~LSUTransComplete) ? 2'b10 : 2'b00;
// Reset if we aren't initiating a transaction or if we are finishing a transaction.
assign CntReset = BusCurrState == STATE_BUS_READY & ~(DCacheFetchLine | DCacheWriteLine) | LSUTransComplete;
assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((UnCachedAccess & (|LSURWM)) | DCacheFetchLine | DCacheWriteLine)) |
(BusCurrState == STATE_BUS_UNCACHED_WRITE) |
(BusCurrState == STATE_BUS_UNCACHED_READ) |
(BusCurrState == STATE_BUS_FETCH) |
(BusCurrState == STATE_BUS_WRITE);
assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE;
assign UnCachedLSUBusWrite = (BusCurrState == STATE_BUS_READY & UnCachedAccess & LSURWM[0] & ~IgnoreRequest) |
(BusCurrState == STATE_BUS_UNCACHED_WRITE);
assign LSUBusWrite = UnCachedLSUBusWrite | (BusCurrState == STATE_BUS_WRITE);
@ -139,6 +168,10 @@ module busfsm #(parameter integer WordCountThreshold,
(BusCurrState == STATE_BUS_UNCACHED_READ);
assign LSUBusRead = UnCachedLSUBusRead | (BusCurrState == STATE_BUS_FETCH) | (BusCurrState == STATE_BUS_READY & DCacheFetchLine);
// Makes bus only do uncached reads/writes when we actually do uncached reads/writes. Needed because CacheableM is 0 when flushing cache.
assign UnCachedRW = UnCachedLSUBusWrite | UnCachedLSUBusRead;
assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LSUBusAck) |
(BusCurrState == STATE_BUS_WRITE & WordCountFlag & LSUBusAck);
assign BusCommittedM = BusCurrState != STATE_BUS_READY;

View File

@ -66,9 +66,13 @@ module lsu (
(* mark_debug = "true" *) output logic LSUBusRead,
(* mark_debug = "true" *) output logic LSUBusWrite,
(* mark_debug = "true" *) input logic LSUBusAck,
(* mark_debug = "true" *) input logic LSUBusInit,
(* mark_debug = "true" *) input logic [`XLEN-1:0] LSUBusHRDATA,
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUBusHWDATA,
(* mark_debug = "true" *) output logic [2:0] LSUBusSize,
(* mark_debug = "true" *) output logic [2:0] LSUBurstType,
(* mark_debug = "true" *) output logic [1:0] LSUTransType,
(* mark_debug = "true" *) output logic LSUTransComplete,
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // from csr
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV,
@ -211,7 +215,7 @@ module lsu (
busdp #(WORDSPERLINE, LINELEN, LOGWPL, CACHE_ENABLED) busdp(
.clk, .reset,
.LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize,
.LSUBusHRDATA, .LSUBusAck, .LSUBusInit, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
.WordCount, .LSUBusWriteCrit,
.LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine,
.DCacheWriteLine, .DCacheBusAck, .DCacheBusWriteData, .LSUPAdrM,

View File

@ -48,7 +48,7 @@ module gpio (
logic [31:0] input0d, input1d, input2d, input3d;
logic [31:0] input_val, input_en, output_en, output_val;
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip;
logic [31:0] rise_ie, rise_ip, fall_ie, fall_ip, high_ie, high_ip, low_ie, low_ip, out_xor;
logic initTrans, memwrite;
logic [7:0] entry, entryd;
@ -91,6 +91,7 @@ module gpio (
high_ip <= #1 0;
low_ie <= #1 0;
low_ip <= #1 0;
out_xor <= #1 0;
end else begin
// writes
if (memwrite)
@ -104,7 +105,7 @@ module gpio (
8'h20: fall_ie <= #1 Din;
8'h28: high_ie <= #1 Din;
8'h30: low_ie <= #1 Din;
8'h40: output_val <= #1 output_val ^ Din; // OUT_XOR
8'h40: out_xor <= #1 Din;
endcase
/* verilator lint_on CASEINCOMPLETE */
// reads
@ -121,7 +122,7 @@ module gpio (
8'h2C: Dout <= #1 high_ip;
8'h30: Dout <= #1 low_ie;
8'h34: Dout <= #1 low_ip;
8'h40: Dout <= #1 0; // OUT_XOR reads as 0
8'h40: Dout <= #1 out_xor;
default: Dout <= #1 0;
endcase
// interrupts
@ -152,7 +153,7 @@ module gpio (
flop #(32) sync2(HCLK,input1d,input2d);
flop #(32) sync3(HCLK,input2d,input3d);
assign input_val = input3d;
assign GPIOPinsOut = output_val;
assign GPIOPinsOut = output_val ^ out_xor;
assign GPIOPinsEn = output_en;
assign GPIOIntr = |{(rise_ip & rise_ie),(fall_ip & fall_ie),(high_ip & high_ie),(low_ip & low_ie)};

View File

@ -43,77 +43,37 @@ module ram #(parameter BASE=0, RANGE = 65535) (
output logic HRESPRam, HREADYRam
);
// Desired changes.
// 1. find a way to merge read and write address into 1 port.
// 2. remove all unnecessary latencies. (HREADY needs to be able to constant high.)
// 3. implement burst.
// 4. remove the configurable latency.
localparam ADDR_WIDTH = $clog2(RANGE/8);
localparam OFFSET = $clog2(`XLEN/8);
logic [`XLEN/8-1:0] ByteMask;
logic [31:0] HADDRD, RamAddr;
//logic prevHREADYRam, risingHREADYRam;
logic initTrans;
logic memwrite, memwriteD, memread;
logic nextHREADYRam;
//logic [3:0] busycount;
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
// a new AHB transactions starts when HTRANS requests a transaction,
// the peripheral is selected, and the previous transaction is completing
assign initTrans = HREADY & HSELRam & (HTRANS[1]);
assign memwrite = initTrans & HWRITE; // *** why is initTrans needed? See CLINT interface
assign memwrite = initTrans & HWRITE;
assign memread = initTrans & ~HWRITE;
flopenr #(1) memwritereg(HCLK, ~HRESETn, HREADY, memwrite, memwriteD);
flopenr #(32) haddrreg(HCLK, ~HRESETn, HREADY, HADDR, HADDRD);
/* // busy FSM to extend READY signal
always @(posedge HCLK, negedge HRESETn)
if (~HRESETn) begin
busycount <= 0;
HREADYRam <= #1 0;
end else begin
if (initTrans) begin
busycount <= 0;
HREADYRam <= #1 0;
end else if (~HREADYRam) begin
if (busycount == 0) begin // Ram latency, for testing purposes. *** test with different values such as 2
HREADYRam <= #1 1;
end else begin
busycount <= busycount + 1;
end
end
end */
// Stall on a read after a write because the RAM can't take both adddresses on the same cycle
assign nextHREADYRam = ~(memwriteD & memread);
// assign nextHREADYRam = ~(memwriteD & ~memwrite);
flopr #(1) readyreg(HCLK, ~HRESETn, nextHREADYRam, HREADYRam);
// assign HREADYRam = ~(memwriteD & ~memwrite);
assign HRESPRam = 0; // OK
localparam ADDR_WIDTH = $clog2(RANGE/8);
localparam OFFSET = $clog2(`XLEN/8);
/* // Rising HREADY edge detector
// Indicates when ram is finishing up
// Needed because HREADY may go high for other reasons,
// and we only want to write data when finishing up.
flopenr #(1) prevhreadyRamreg(HCLK,~HRESETn, 1'b1, HREADYRam,prevHREADYRam);
assign risingHREADYRam = HREADYRam & ~prevHREADYRam;*/
/*
bram2p1r1w #(`XLEN/8, 8, ADDR_WDITH, `FPGA)
memory(.clk(HCLK), .reA(1'b1),
.addrA(A[ADDR_WDITH+OFFSET-1:OFFSET]), .doutA(HREADRam),
.weB(memwrite & risingHREADYRam), .bweB(ByteMaskM),
.addrB(HWADDR[ADDR_WDITH+OFFSET-1:OFFSET]), .dinB(HWDATA)); */
// On writes or during a wait state, use address delayed by one cycle to sync RamAddr with HWDATA or hold stalled address
mux2 #(32) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr);
// Byte mask for subword writes
// ***the CLINT and other peripherals duplicate this hardware
// *** it shoudl be centralized and sent over HWSTRB
swbytemask swbytemask(.Size(HSIZED[1:0]), .Adr(HADDRD[2:0]), .ByteMask(ByteMask));
// single-ported RAM
bram1p1rw #(`XLEN/8, 8, ADDR_WIDTH)
memory(.clk(HCLK), .we(memwriteD), .bwe(ByteMask), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRam), .din(HWDATA));

View File

@ -93,10 +93,12 @@ module wallypipelinedcore (
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
// memory management unit signals
@ -134,13 +136,16 @@ module wallypipelinedcore (
logic [`PA_BITS-1:0] IFUBusAdr;
logic [`XLEN-1:0] IFUBusHRDATA;
logic IFUBusRead;
logic IFUBusAck;
logic IFUBusAck, IFUBusInit;
logic [2:0] IFUBurstType;
logic [1:0] IFUTransType;
logic IFUTransComplete;
// AHB LSU interface
logic [`PA_BITS-1:0] LSUBusAdr;
logic LSUBusRead;
logic LSUBusWrite;
logic LSUBusAck;
logic LSUBusAck, LSUBusInit;
logic [`XLEN-1:0] LSUBusHRDATA;
logic [`XLEN-1:0] LSUBusHWDATA;
@ -152,6 +157,9 @@ module wallypipelinedcore (
logic [4:0] InstrClassM;
logic InstrAccessFaultF;
logic [2:0] LSUBusSize;
logic [2:0] LSUBurstType;
logic [1:0] LSUTransType;
logic LSUTransComplete;
logic DCacheMiss;
logic DCacheAccess;
@ -166,8 +174,8 @@ module wallypipelinedcore (
.StallF, .StallD, .StallE, .StallM,
.FlushF, .FlushD, .FlushE, .FlushM,
// Fetch
.IFUBusHRDATA, .IFUBusAck, .PCF, .IFUBusAdr,
.IFUBusRead, .IFUStallF,
.IFUBusHRDATA, .IFUBusAck, .IFUBusInit, .PCF, .IFUBusAdr,
.IFUBusRead, .IFUStallF, .IFUBurstType, .IFUTransType, .IFUTransComplete,
.ICacheAccess, .ICacheMiss,
// Execute
@ -224,6 +232,8 @@ module wallypipelinedcore (
.CSRReadValW, .ReadDataM, .MDUResultW,
.RdW, .ReadDataW,
.InstrValidM,
.FCvtIntResW,
.FResSelW,
// hazards
.StallD, .StallE, .StallM, .StallW,
@ -247,8 +257,8 @@ module wallypipelinedcore (
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataM, .FlushDCacheM,
// connected to ahb (all stay the same)
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize,
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
// connect to csr or privilege and stay the same.
.PrivilegeModeW, .BigEndianM, // connects to csr
@ -279,13 +289,22 @@ module wallypipelinedcore (
ahblite ebu(// IFU connections
.clk, .reset,
.UnsignedLoadM(1'b0), .AtomicMaskedM(2'b00),
.IFUBusAdr,
.IFUBusRead, .IFUBusHRDATA, .IFUBusAck,
.IFUBusAdr, .IFUBusRead,
.IFUBusHRDATA,
.IFUBurstType,
.IFUTransType,
.IFUTransComplete,
.IFUBusAck,
.IFUBusInit,
// Signals from Data Cache
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusHWDATA,
.LSUBusHRDATA,
.LSUBusSize,
.LSUBurstType,
.LSUTransType,
.LSUTransComplete,
.LSUBusAck,
.LSUBusInit,
.HRDATA, .HREADY, .HRESP, .HCLK, .HRESETn,
.HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST,
@ -375,6 +394,8 @@ module wallypipelinedcore (
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FCvtIntResW, // fp -> int conversion result to be stored in int register
.FResSelW, // fpu result selection
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)

View File

@ -0,0 +1,18 @@
CC = gcc
CFLAGS = -lm
LIBS =
OBJS = disp.o srt4div.o
srt4div: $(OBJS)
$(CC) -g -O3 -o srt4div $(OBJS) $(CFLAGS)
disp.o: disp.h disp.c
$(CC) -g -c -o disp.o disp.c $(CFLAGS)
srt4div.o: srt4div.c
$(CC) -g -c -o srt4div.o srt4div.c $(CFLAGS)
clean:
rm -f *.o *~
rm -f core

60
pipelined/srt/stine/disp.c Executable file
View File

@ -0,0 +1,60 @@
#include "disp.h"
double rnd_zero(double x, double bits) {
if (x < 0)
return ceiling(x, bits);
else
return flr(x, bits);
}
double rne(double x, double precision) {
double scale, x_round;
scale = pow(2.0, precision);
x_round = rint(x * scale) / scale;
return x_round;
}
double flr(double x, double precision) {
double scale, x_round;
scale = pow(2.0, precision);
x_round = floor(x * scale) / scale;
return x_round;
}
double ceiling(double x, double precision) {
double scale, x_round;
scale = pow(2.0, precision);
x_round = ceil(x * scale) / scale;
return x_round;
}
void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file) {
double diff;
int i;
if (fabs(x) < pow(2.0, -bits_to_right)) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
fprintf(out_file,"0");
}
return;
}
if (x < 0.0) {
// fprintf(out_file, "-");
// x = - x;
x = pow(2.0, ((double) bits_to_left)) + x;
}
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, -i);
if (x < diff) {
fprintf(out_file, "0");
}
else {
fprintf(out_file, "1");
x -= diff;
}
if (i == 0) {
fprintf(out_file, ".");
}
}
}

18
pipelined/srt/stine/disp.h Executable file
View File

@ -0,0 +1,18 @@
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#ifndef DISP
#define DISP
double rnd_zero(double x, double bits);
double rne(double x, double precision);
double flr(double x, double precision);
double ceiling(double x, double precision);
void disp_bin(double x, int bits_to_left, int bits_to_right, FILE *out_file);
#endif

View File

@ -195,7 +195,7 @@ module divide4 #(parameter WIDTH=64)
logic [WIDTH:0] Qstar;
logic [WIDTH:0] QMstar;
logic [WIDTH:0] QM2star;
logic [6:0] qtotal;
logic [7:0] qtotal;
logic [WIDTH+3:0] SumN, CarryN, SumN2, CarryN2;
logic [WIDTH+3:0] divi1, divi2, divi1c, divi2c, dive1;
logic [WIDTH+3:0] mdivi_temp, mdivi;
@ -219,9 +219,9 @@ module divide4 #(parameter WIDTH=64)
mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN);
mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN);
// Simplify QST
adder #(7) cpa1 (SumN[WIDTH+3:WIDTH-3], CarryN[WIDTH+3:WIDTH-3], qtotal);
adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal);
// q = {+2, +1, -1, -2} else q = 0
qst4 pd1 (qtotal[6:0], divi1[WIDTH-1:WIDTH-3], quotient);
qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient);
assign ulp = quotient[2]|quotient[3];
assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]);
// Map to binary encoding

BIN
pipelined/srt/stine/srt4div Executable file

Binary file not shown.

325
pipelined/srt/stine/srt4div.c Executable file
View File

@ -0,0 +1,325 @@
#include "disp.h"
#include <math.h>
int qslc (double prem, double d) {
int q;
printf("d --> %lg\n", d);
printf("rw --> %lg\n", prem);
if ((d>=0.0)&&(d<1.0)) {
if (prem>=1.0)
q = 2;
else if (prem>=0.25)
q = 1;
else if (prem>=-0.25)
q = 0;
else if (prem >= -1)
q = -1;
else
q = -2;
return q;
}
if ((d>=1.0)&&(d<2.0)) {
if (prem>=2.0)
q = 2;
else if (prem>=0.66667)
q = 1;
else if (prem>=-0.6667)
q = 0;
else if (prem >= -2)
q = -1;
else
q = -2;
return q;
}
if ((d>=2.0)&&(d<3.0)) {
if (prem>=4.0)
q = 2;
else if (prem>=1.25)
q = 1;
else if (prem>=-1.25)
q = 0;
else if (prem >= -4)
q = -1;
else
q = -2;
return q;
}
if ((d>=3.0)&&(d<4.0)) {
if (prem>=5.0)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -5)
q = -1;
else
q = -2;
return q;
}
if ((d>=4.0)&&(d<5.0)) {
if (prem>=6.66667)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -6.66667)
q = -1;
else
q = -2;
return q;
}
if ((d>=5.0)&&(d<6.0)) {
if (prem>=8.0)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -8.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=6.0)&&(d<7.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -10.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=7.0)&&(d<8.0)) {
if (prem>=11.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -11.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=8.0)&&(d<9.0)) {
if (prem>=12.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -12.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=9.0)&&(d<10.0)) {
if (prem>=15.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -15.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=10.0)&&(d<11.0)) {
if (prem>=15.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -15.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=11.0)&&(d<12.0)) {
if (prem>=16.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -16.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=12.0)&&(d<13.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=13.0)&&(d<14.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=14.0)&&(d<15.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=15.0)&&(d<16.0)) {
if (prem>=24.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -24.0)
q = -1;
else
q = -2;
return q;
}
}
/*
This routine performs a radix-4 SRT division
algorithm. The user inputs the numerator, the denominator,
and the number of iterations. It assumes that 0.5 <= D < 1.
*/
int main(int argc, char* argv[]) {
double P, N, D, Q, RQ, RD, RREM, scale;
int q;
int num_iter, i;
int prec;
if (argc < 5) {
fprintf(stderr,
"Usage: %s numerator denominator num_iterations prec\n",
argv[0]);
exit(1);
}
sscanf(argv[1],"%lg", &N);
sscanf(argv[2],"%lg", &D);
sscanf(argv[3],"%d", &num_iter);
sscanf(argv[4],"%d", &prec);
// Round to precision
N = rne(N, prec);
D = rne(D, prec);
printf("N = ");
disp_bin(N, 3, prec, stdout);
printf("\n");
printf("D = ");
disp_bin(D, 3, prec, stdout);
printf("\n");
Q = 0;
P = N*0.25;
printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n",
N, D, N/D, num_iter);
for (scale = 1, i = 0; i < num_iter; i++) {
// Shift by r
scale = scale*0.25;
q = qslc(flr((4*P)*16,3), D*16);
//q = -q;
printf("4*W[n] = ");
disp_bin(4*P,3,prec,stdout);
printf("\n");
printf("q*D = ");
disp_bin(q*D,3,prec,stdout);
printf("\n");
printf("W[n+1] = ");
disp_bin(P ,3,prec,stdout);
printf("\n");
// Recurrence
P = 4*P - q*D;
// OTFC
Q = Q + q*scale;
printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P);
printf("i = %d, q = %d", i, q);
printf(", Q = ");
disp_bin(Q, 3, prec, stdout);
printf(", W = ");
disp_bin(P, 3, prec, stdout);
printf("\n\n");
}
if (P < 0) {
Q = Q - scale;
P = P + D;
printf("\nCorrecting Negative Remainder\n");
printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
printf("Q = ");
disp_bin(Q, 3, prec, stdout);
printf(", W = ");
disp_bin(P, 3, prec, stdout);
printf("\n");
}
RQ = flr(N/D, (double) prec);
RD = Q*4;
printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
printf("true = ");
disp_bin(RQ, 3, prec, stdout);
printf(", computed = ");
disp_bin(RD, 3, prec, stdout);
printf("\n\n");
printf("REM = %1.18lf \n", P);
printf("REM = ");
disp_bin(P, 3, prec, stdout);
printf("\n\n");
return 0;
}

View File

@ -1,289 +0,0 @@
4000000000000000_4000000000000000_3ff0000000000000
c018000000000000_4000000000000000_c008000000000000
4024000000000000_4000000000000000_4014000000000000
c032000000000000_4000000000000000_c022000000000000
4041000000000000_4000000000000000_4031000000000000
c05c000000000000_4000000000000000_c04c000000000000
406e000000000000_4000000000000000_405e000000000000
c07ffff583a53b8e_4000000000000000_c06ffff583a53b8e
408199999999999a_4000000000000000_407199999999999a
c093333333333333_4000000000000000_c083333333333333
40a028f5c28f5c29_4000000000000000_409028f5c28f5c29
c0b004189374bc6a_4000000000000000_c0a004189374bc6a
40c00068db8bac71_4000000000000000_40b00068db8bac71
c0dd1745d1745d17_4000000000000000_c0cd1745d1745d17
40e5555555555555_4000000000000000_40d5555555555555
c0f999999999999a_4000000000000000_c0e999999999999a
410c71c71c71c71c_4000000000000000_40fc71c71c71c71c
4000000000000000_c018000000000000_bfe5555555555555
c018000000000000_c018000000000000_3ff0000000000000
4024000000000000_c018000000000000_c00aaaaaaaaaaaab
c032000000000000_c018000000000000_4018000000000000
4041000000000000_c018000000000000_c026aaaaaaaaaaab
c05c000000000000_c018000000000000_4032aaaaaaaaaaab
406e000000000000_c018000000000000_c044000000000000
c07ffff583a53b8e_c018000000000000_4055554e57c37d09
408199999999999a_c018000000000000_c067777777777778
c093333333333333_c018000000000000_4079999999999999
40a028f5c28f5c29_c018000000000000_c0858bf258bf258c
c0b004189374bc6a_c018000000000000_40955acb6f46508d
40c00068db8bac71_c018000000000000_c0a555e124ba3b41
c0dd1745d1745d17_c018000000000000_40b364d9364d9365
40e5555555555555_c018000000000000_c0cc71c71c71c71c
c0f999999999999a_c018000000000000_40d1111111111111
410c71c71c71c71c_c018000000000000_c0e2f684bda12f68
4000000000000000_4024000000000000_3fd999999999999a
c018000000000000_4024000000000000_bfe3333333333333
4024000000000000_4024000000000000_3ff0000000000000
c032000000000000_4024000000000000_c00ccccccccccccd
4041000000000000_4024000000000000_401b333333333333
c05c000000000000_4024000000000000_c026666666666666
406e000000000000_4024000000000000_4038000000000000
c07ffff583a53b8e_4024000000000000_c0499991361dc93e
408199999999999a_4024000000000000_405c28f5c28f5c2a
c093333333333333_4024000000000000_c06eb851eb851eb8
40a028f5c28f5c29_4024000000000000_4079db22d0e56042
c0b004189374bc6a_4024000000000000_c089a027525460aa
40c00068db8bac71_4024000000000000_40999a415f45e0b5
c0dd1745d1745d17_4024000000000000_c0a745d1745d1746
40e5555555555555_4024000000000000_40b1111111111111
c0f999999999999a_4024000000000000_c0c47ae147ae147b
410c71c71c71c71c_4024000000000000_40d6c16c16c16c16
4000000000000000_c032000000000000_bfcc71c71c71c71c
c018000000000000_c032000000000000_3fd5555555555555
4024000000000000_c032000000000000_bfe1c71c71c71c72
c032000000000000_c032000000000000_3ff0000000000000
4041000000000000_c032000000000000_c00e38e38e38e38e
c05c000000000000_c032000000000000_4018e38e38e38e39
406e000000000000_c032000000000000_c02aaaaaaaaaaaab
c07ffff583a53b8e_c032000000000000_403c71bdca59fc0c
408199999999999a_c032000000000000_c04f49f49f49f4a0
c093333333333333_c032000000000000_4051111111111111
40a028f5c28f5c29_c032000000000000_c06cba9876543210
c0b004189374bc6a_c032000000000000_407c790f3f086b67
40c00068db8bac71_c032000000000000_c08c7281864da457
c0dd1745d1745d17_c032000000000000_4099dbcc48676f31
40e5555555555555_c032000000000000_c0a2f684bda12f68
c0f999999999999a_c032000000000000_40b6c16c16c16c17
410c71c71c71c71c_c032000000000000_c0c948b0fcd6e9e0
4000000000000000_4041000000000000_3fbe1e1e1e1e1e1e
c018000000000000_4041000000000000_bfc6969696969697
4024000000000000_4041000000000000_3fd2d2d2d2d2d2d3
c032000000000000_4041000000000000_bfe0f0f0f0f0f0f1
4041000000000000_4041000000000000_3ff0000000000000
c05c000000000000_4041000000000000_c00a5a5a5a5a5a5a
406e000000000000_4041000000000000_401c3c3c3c3c3c3c
c07ffff583a53b8e_4041000000000000_c02e1e143faa9268
408199999999999a_4041000000000000_4030909090909091
c093333333333333_4041000000000000_c042121212121212
40a028f5c28f5c29_4041000000000000_405e6b3804d19e6b
c0b004189374bc6a_4041000000000000_c06e25d3e863448b
40c00068db8bac71_4041000000000000_407e1ee37f25085c
c0dd1745d1745d17_4041000000000000_c08b6132a7041b61
40e5555555555555_4041000000000000_4094141414141414
c0f999999999999a_4041000000000000_c0a8181818181818
410c71c71c71c71c_4041000000000000_40bac5701ac5701a
4000000000000000_c05c000000000000_bfa2492492492492
c018000000000000_c05c000000000000_3fbb6db6db6db6db
4024000000000000_c05c000000000000_bfc6db6db6db6db7
c032000000000000_c05c000000000000_3fd4924924924925
4041000000000000_c05c000000000000_bfe36db6db6db6db
c05c000000000000_c05c000000000000_3ff0000000000000
406e000000000000_c05c000000000000_c001249249249249
c07ffff583a53b8e_c05c000000000000_4012491e945e6b2d
408199999999999a_c05c000000000000_c0241d41d41d41d5
c093333333333333_c05c000000000000_4035f15f15f15f16
40a028f5c28f5c29_c05c000000000000_c04277f44c118de6
c0b004189374bc6a_c05c000000000000_40524dd2f1a9fbe7
40c00068db8bac71_c05c000000000000_c062499c689fa081
c0dd1745d1745d17_c05c000000000000_40709f959c427e56
40e5555555555555_c05c000000000000_c088618618618618
c0f999999999999a_c05c000000000000_409d41d41d41d41e
410c71c71c71c71c_c05c000000000000_c0a0410410410410
4000000000000000_406e000000000000_3f91111111111111
c018000000000000_406e000000000000_bfa999999999999a
4024000000000000_406e000000000000_3fb5555555555555
c032000000000000_406e000000000000_bfc3333333333333
4041000000000000_406e000000000000_3fd2222222222222
c05c000000000000_406e000000000000_bfedddddddddddde
406e000000000000_406e000000000000_3ff0000000000000
c07ffff583a53b8e_406e000000000000_c001110b796930d4
408199999999999a_406e000000000000_4012c5f92c5f92c6
c093333333333333_406e000000000000_c0247ae147ae147b
40a028f5c28f5c29_406e000000000000_40313cc1e098ead6
c0b004189374bc6a_406e000000000000_c041156f8c384071
40c00068db8bac71_406e000000000000_40511180ea2e95ce
c0dd1745d1745d17_406e000000000000_c06f07c1f07c1f07
40e5555555555555_406e000000000000_4076c16c16c16c16
c0f999999999999a_406e000000000000_c08b4e81b4e81b4f
410c71c71c71c71c_406e000000000000_409e573ac901e573
4000000000000000_c07ffff583a53b8e_bf8000053e2f1a08
c018000000000000_c07ffff583a53b8e_3f980007dd46a70b
4024000000000000_c07ffff583a53b8e_bfa400068dbae089
c032000000000000_c07ffff583a53b8e_3fb20005e5f4fd48
4041000000000000_c07ffff583a53b8e_bfc1000592120ba8
c05c000000000000_c07ffff583a53b8e_3fdc00092cd26d8d
406e000000000000_c07ffff583a53b8e_bfee0009d49850ce
c07ffff583a53b8e_c07ffff583a53b8e_3ff0000000000000
408199999999999a_c07ffff583a53b8e_c001999f5e009ca2
c093333333333333_c07ffff583a53b8e_401333397dd21f3c
40a028f5c28f5c29_c07ffff583a53b8e_c02028fb0e2a73e4
c0b004189374bc6a_c07ffff583a53b8e_4030041dd2fb6fd0
40c00068db8bac71_c07ffff583a53b8e_c040006e19dd229c
c0dd1745d1745d17_c07ffff583a53b8e_405d174f59ca00c8
40e5555555555555_c07ffff583a53b8e_c065555c52e9780a
c0f999999999999a_c07ffff583a53b8e_407999a1fd1829a6
410c71c71c71c71c_c07ffff583a53b8e_c08c71d06e8ca00d
4000000000000000_408199999999999a_3f7d1745d1745d17
c018000000000000_408199999999999a_bf85d1745d1745d1
4024000000000000_408199999999999a_3f922e8ba2e8ba2e
c032000000000000_408199999999999a_bfa05d1745d1745d
4041000000000000_408199999999999a_3fbee8ba2e8ba2e8
c05c000000000000_408199999999999a_bfc9745d1745d174
406e000000000000_408199999999999a_3fdb45d1745d1745
c07ffff583a53b8e_408199999999999a_bfed173c4921d90c
408199999999999a_408199999999999a_3ff0000000000000
c093333333333333_408199999999999a_c001745d1745d174
40a028f5c28f5c29_408199999999999a_401d61bed61bed61
c0b004189374bc6a_408199999999999a_c02d1eb851eb851d
40c00068db8bac71_408199999999999a_403d180477e6ade4
c0dd1745d1745d17_408199999999999a_c04a723f789854a0
40e5555555555555_408199999999999a_405364d9364d9364
c0f999999999999a_408199999999999a_c06745d1745d1746
410c71c71c71c71c_408199999999999a_4079dbcc48676f30
4000000000000000_c093333333333333_bf6aaaaaaaaaaaab
c018000000000000_c093333333333333_3f74000000000000
4024000000000000_c093333333333333_bf80aaaaaaaaaaab
c032000000000000_c093333333333333_3f9e000000000000
4041000000000000_c093333333333333_bfac555555555556
c05c000000000000_c093333333333333_3fb7555555555556
406e000000000000_c093333333333333_bfc9000000000000
c07ffff583a53b8e_c093333333333333_3fdaaaa1edb45c4c
408199999999999a_c093333333333333_bfed555555555556
c093333333333333_c093333333333333_3ff0000000000000
40a028f5c28f5c29_c093333333333333_c00aeeeeeeeeeeef
c0b004189374bc6a_c093333333333333_401ab17e4b17e4b1
40c00068db8bac71_c093333333333333_c02aab596de8ca12
c0dd1745d1745d17_c093333333333333_40383e0f83e0f83e
40e5555555555555_c093333333333333_c041c71c71c71c72
c0f999999999999a_c093333333333333_4055555555555556
410c71c71c71c71c_c093333333333333_c067b425ed097b42
4000000000000000_40a028f5c28f5c29_3f5faee41e6a7498
c018000000000000_40a028f5c28f5c29_bf67c32b16cfd772
4024000000000000_40a028f5c28f5c29_3f73cd4e930288df
c032000000000000_40a028f5c28f5c29_bf81d260511be196
4041000000000000_40a028f5c28f5c29_3f90d4e930288df1
c05c000000000000_40a028f5c28f5c29_bfabb9079a9d2605
406e000000000000_40a028f5c28f5c29_3fbdb3f5dc83cd4f
c07ffff583a53b8e_40a028f5c28f5c29_bfcfaed9bca398bf
408199999999999a_40a028f5c28f5c29_3fd16cfd7720f354
c093333333333333_40a028f5c28f5c29_bfe30288df0cac5b
40a028f5c28f5c29_40a028f5c28f5c29_3ff0000000000000
c0b004189374bc6a_40a028f5c28f5c29_c00fb70081c635bb
40c00068db8bac71_40a028f5c28f5c29_401fafb3c1f3a182
c0dd1745d1745d17_40a028f5c28f5c29_c02ccd899003afd0
40e5555555555555_40a028f5c28f5c29_40351f42bef1a310
c0f999999999999a_40a028f5c28f5c29_c04958b67ebb907a
410c71c71c71c71c_40a028f5c28f5c29_405c29ae53ecd96a
4000000000000000_c0b004189374bc6a_bf4ff7d0f16c2e0a
c018000000000000_c0b004189374bc6a_3f57f9dcb5112287
4024000000000000_c0b004189374bc6a_bf63fae296e39cc6
c032000000000000_c0b004189374bc6a_3f71fb6587ccd9e5
4041000000000000_c0b004189374bc6a_bf80fba700417875
c05c000000000000_c0b004189374bc6a_3f9bf8d6d33ea848
406e000000000000_c0b004189374bc6a_bfadf853e2556b29
c07ffff583a53b8e_c0b004189374bc6a_3fbff7c677bfebb5
408199999999999a_c0b004189374bc6a_bfc1951951951953
c093333333333333_c0b004189374bc6a_3fd32e4a2a741b9f
40a028f5c28f5c29_c0b004189374bc6a_bfe024d3c19930d9
c0b004189374bc6a_c0b004189374bc6a_3ff0000000000000
40c00068db8bac71_c0b004189374bc6a_c00ff8a272e15ca2
c0dd1745d1745d17_c0b004189374bc6a_401d0fd53890e409
40e5555555555555_c0b004189374bc6a_c0254fe0a0f2c95b
c0f999999999999a_c0b004189374bc6a_4039930d8df024d5
410c71c71c71c71c_c0b004189374bc6a_c04c6a80d6990c7a
4000000000000000_40c00068db8bac71_3f3fff2e4e46e7a8
c018000000000000_40c00068db8bac71_bf47ff62bab52dbe
4024000000000000_40c00068db8bac71_3f53ff7cf0ec50c9
c032000000000000_40c00068db8bac71_bf61ff8a0c07e24f
4041000000000000_40c00068db8bac71_3f70ff909995ab11
c05c000000000000_40c00068db8bac71_bf8bff48847e0ab3
406e000000000000_40c00068db8bac71_3f9dff3b6962792e
c07ffff583a53b8e_40c00068db8bac71_bfafff23d230d9a4
408199999999999a_40c00068db8bac71_3fb1992644a6ff6a
c093333333333333_40c00068db8bac71_bfc332b5622a8afe
40a028f5c28f5c29_40c00068db8bac71_3fd0288bdd4a34fd
c0b004189374bc6a_40c00068db8bac71_bfe003af9fc0ed8b
40c00068db8bac71_40c00068db8bac71_3ff0000000000000
c0dd1745d1745d17_40c00068db8bac71_c00d16872fe35e3c
40e5555555555555_40c00068db8bac71_401554c989849a70
c0f999999999999a_40c00068db8bac71_c02998f1d838b954
410c71c71c71c71c_40c00068db8bac71_403c710cb75b7895
4000000000000000_c0dd1745d1745d17_bf2199999999999a
c018000000000000_c0dd1745d1745d17_3f3a666666666667
4024000000000000_c0dd1745d1745d17_bf46000000000000
c032000000000000_c0dd1745d1745d17_3f53cccccccccccd
4041000000000000_c0dd1745d1745d17_bf62b33333333333
c05c000000000000_c0dd1745d1745d17_3f7ecccccccccccd
406e000000000000_c0dd1745d1745d17_bf80800000000000
c07ffff583a53b8e_c0dd1745d1745d17_3f919993d5347a5b
408199999999999a_c0dd1745d1745d17_bfa35c28f5c28f5d
c093333333333333_c0dd1745d1745d17_3fb51eb851eb851f
40a028f5c28f5c29_c0dd1745d1745d17_bfc1c6a7ef9db22d
c0b004189374bc6a_c0dd1745d1745d17_3fd19e1b089a0275
40c00068db8bac71_c0dd1745d1745d17_bfe19a0cf1800a7c
c0dd1745d1745d17_c0dd1745d1745d17_3ff0000000000000
40e5555555555555_c0dd1745d1745d17_c007777777777777
c0f999999999999a_c0dd1745d1745d17_401c28f5c28f5c2a
410c71c71c71c71c_c0dd1745d1745d17_c02f49f49f49f49f
4000000000000000_40e5555555555555_3f18000000000000
c018000000000000_40e5555555555555_bf22000000000000
4024000000000000_40e5555555555555_3f3e000000000000
c032000000000000_40e5555555555555_bf4b000000000000
4041000000000000_40e5555555555555_3f59800000000000
c05c000000000000_40e5555555555555_bf65000000000000
406e000000000000_40e5555555555555_3f76800000000000
c07ffff583a53b8e_40e5555555555555_bf87fff822bbecab
408199999999999a_40e5555555555555_3f9a666666666667
c093333333333333_40e5555555555555_bfaccccccccccccd
40a028f5c28f5c29_40e5555555555555_3fb83d70a3d70a3e
c0b004189374bc6a_40e5555555555555_bfc80624dd2f1a9f
40c00068db8bac71_40e5555555555555_3fd8009d495182aa
c0dd1745d1745d17_40e5555555555555_bfe5d1745d1745d2
40e5555555555555_40e5555555555555_3ff0000000000000
c0f999999999999a_40e5555555555555_c003333333333334
410c71c71c71c71c_40e5555555555555_4015555555555555
4000000000000000_c0f999999999999a_bf04000000000000
c018000000000000_c0f999999999999a_3f1e000000000000
4024000000000000_c0f999999999999a_bf29000000000000
c032000000000000_c0f999999999999a_3f36800000000000
4041000000000000_c0f999999999999a_bf45400000000000
c05c000000000000_c0f999999999999a_3f51800000000000
406e000000000000_c0f999999999999a_bf62c00000000000
c07ffff583a53b8e_c0f999999999999a_3f73fff972474538
408199999999999a_c0f999999999999a_bf86000000000000
c093333333333333_c0f999999999999a_3f97ffffffffffff
40a028f5c28f5c29_c0f999999999999a_bfa4333333333333
c0b004189374bc6a_c0f999999999999a_3fb4051eb851eb84
40c00068db8bac71_c0f999999999999a_bfc40083126e978d
c0dd1745d1745d17_c0f999999999999a_3fd22e8ba2e8ba2e
40e5555555555555_c0f999999999999a_bfeaaaaaaaaaaaaa
c0f999999999999a_c0f999999999999a_3ff0000000000000
410c71c71c71c71c_c0f999999999999a_c001c71c71c71c71
4000000000000000_410c71c71c71c71c_3ef2000000000000
c018000000000000_410c71c71c71c71c_bf0b000000000000
4024000000000000_410c71c71c71c71c_3f16800000000000
c032000000000000_410c71c71c71c71c_bf24400000000000
4041000000000000_410c71c71c71c71c_3f33200000000000
c05c000000000000_410c71c71c71c71c_bf4f800000000000
406e000000000000_410c71c71c71c71c_3f50e00000000000
c07ffff583a53b8e_410c71c71c71c71c_bf61fffa1a0cf180
408199999999999a_410c71c71c71c71c_3f73ccccccccccce
c093333333333333_410c71c71c71c71c_bf8599999999999a
40a028f5c28f5c29_410c71c71c71c71c_3f922e147ae147ae
c0b004189374bc6a_410c71c71c71c71c_bfa2049ba5e353f8
40c00068db8bac71_410c71c71c71c71c_3fb20075f6fd21ff
c0dd1745d1745d17_410c71c71c71c71c_bfc05d1745d1745d
40e5555555555555_410c71c71c71c71c_3fd8000000000000
c0f999999999999a_410c71c71c71c71c_bfecccccccccccce
410c71c71c71c71c_410c71c71c71c71c_3ff0000000000000

View File

@ -1,145 +0,0 @@
///////////////////////////////////////////
// testbench-imperas.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs from the Imperas suite
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module testbench();
logic clk;
logic reset, reset_ext;
int test, i, errors, totalerrors;
logic [31:0] sig32[10000:0];
logic [`XLEN-1:0] signature[10000:0];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [`XLEN-1:0] meminit;
string tests[];
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
logic [`AHBW-1:0] HWDATA;
logic HWRITE;
logic [2:0] HSIZE;
logic [2:0] HBURST;
logic [3:0] HPROT;
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
// pick tests based on modes supported
initial
// tests = {"../../tests/imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremarkcodemod.bare.riscv.memfile", "1000"};
tests = {"../../benchmarks/riscv-coremark/work/coremark.bare.riscv.memfile", "1000"};
string signame, memfilename;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
logic SDCCLK;
logic SDCCmdIn;
logic SDCCmdOut;
logic SDCCmdOE;
logic [3:0] SDCDatIn;
logic HREADY;
logic HSELEXT;
assign SDCmd = 1'bz;
assign SDCDat = 4'bz;
// instantiate device to be tested
assign GPIOPinsIn = 0;
assign UARTSin = 1;
assign HREADYEXT = 1;
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.clk, .reset_ext, .reset(), .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
logic [31:0] InstrW;
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
dut.core.ifu.FinalInstrRawF,
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
dut.core.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
/*
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
dut.core.ifu.icache.controller.FinalInstrRawF,
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
dut.core.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
*/
logic [`XLEN-1:0] PCW;
flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, dut.core.ifu.PCM, PCW);
// initialize tests
integer j;
initial
begin
totalerrors = 0;
// read test vectors into memory
memfilename = tests[0];
$readmemh(memfilename, dut.uncore.ram.ram.RAM);
//for(j=268437955; j < 268566528; j = j+1)
//dut.uncore.ram.RAM[j] = 64'b0;
// ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.addr";
// ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.lab";
//dut.uncore.ram.RAM[268437713]=64'b1;
reset_ext = 1; # 22; reset_ext = 0;
end
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
end
always @(negedge clk)
begin
if (dut.core.priv.priv.ecallM) begin
#20;
$display("Code ended with ebreakM");
$stop;
end
end
initial begin
// $readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory);
// $readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.memory);
$readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
$readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);
end
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */

View File

@ -10,120 +10,64 @@ module testbenchfp;
parameter TEST="none";
string Tests[]; // list of tests to be run
string FmaRneTests[]; // list of FMA round to nearest even tests to run
string FmaRuTests[]; // list of FMA round up tests to run
string FmaRdTests[]; // list of FMA round down tests to run
string FmaRzTests[]; // list of FMA round twords zero
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
logic [2:0] OpCtrl[]; // list of op controls
logic [2:0] Unit[]; // list of units being tested
logic WriteInt[]; // Is being written to integer resgiter
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
logic [1:0] Fmt[]; // list of formats for the other units
logic [1:0] FmaFmt[]; // list of formats for the FMA
logic clk=0;
logic [31:0] TestNum=0; // index for the test
logic [31:0] FmaTestNum=0; // index for the test
logic [31:0] OpCtrlNum=0; // index for OpCtrl
logic [31:0] errors=0; // how many errors
logic [31:0] VectorNum=0; // index for test vector
logic [31:0] FmaVectorNum=0; // index for test vector
logic [31:0] FrmNum=0; // index for rounding mode
logic [`FLEN*4+7:0] TestVectors[46464:0]; // list of test vectors
logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
logic [`FLEN*4+7:0] FmaRuVectors[6133248:0]; // list of fma ru test vectors
logic [`FLEN*4+7:0] FmaRdVectors[6133248:0]; // list of fma rd test vectors
logic [`FLEN*4+7:0] FmaRzVectors[6133248:0]; // list of fma rz test vectors
logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
logic [`FLEN*4+7:0] TestVectors[6133248:0]; // list of test vectors
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
logic [1:0] FmtVal; // value of the current Fmt
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
logic WriteIntVal; // value of the current WriteInt
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRuX, FmaRuY, FmaRuZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRdX, FmaRdY, FmaRdZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRnmX, FmaRnmY, FmaRnmZ; // inputs read from TestFloat
logic [`XLEN-1:0] SrcA; // integer input
logic [`FLEN-1:0] Ans; // correct answer from TestFloat
logic [`FLEN-1:0] FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
logic [`FLEN-1:0] Res; // result from other units
logic [`FLEN-1:0] FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
logic [4:0] AnsFlg; // correct flags read from testfloat
logic [4:0] FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
logic [4:0] ResFlg; // Result flags
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
logic [`FMTBITS-1:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
logic [4:0] ResFlg, Flg; // Result flags
logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
logic AnsNaN, ResNaN, NaNGood;
logic XSgn, YSgn, ZSgn; // sign of the inputs
logic FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
logic FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
logic FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
logic FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
logic FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs
logic [`NE-1:0] FmaRneXExp, FmaRneYExp, FmaRneZExp;
logic [`NE-1:0] FmaRzXExp, FmaRzYExp, FmaRzZExp;
logic [`NE-1:0] FmaRuXExp, FmaRuYExp, FmaRuZExp;
logic [`NE-1:0] FmaRdXExp, FmaRdYExp, FmaRdZExp;
logic [`NE-1:0] FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs
logic [`NF:0] FmaRneXMan, FmaRneYMan, FmaRneZMan;
logic [`NF:0] FmaRzXMan, FmaRzYMan, FmaRzZMan;
logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan;
logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan;
logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
logic XNaN, YNaN, ZNaN; // is the input NaN
logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
logic FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
logic FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
logic FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
logic FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
logic FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
logic XDenorm, ZDenorm; // is the input denormalized
logic FmaRneXDenorm, FmaRneZDenorm;
logic FmaRzXDenorm, FmaRzZDenorm;
logic FmaRuXDenorm, FmaRuZDenorm;
logic FmaRdXDenorm, FmaRdZDenorm;
logic FmaRnmXDenorm, FmaRnmZDenorm;
logic XInf, YInf, ZInf; // is the input infinity
logic FmaRneXInf, FmaRneYInf, FmaRneZInf;
logic FmaRzXInf, FmaRzYInf, FmaRzZInf;
logic FmaRuXInf, FmaRuYInf, FmaRuZInf;
logic FmaRdXInf, FmaRdYInf, FmaRdZInf;
logic FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
logic XZero, YZero, ZZero; // is the input zero
logic FmaRneXZero, FmaRneYZero, FmaRneZZero;
logic FmaRzXZero, FmaRzYZero, FmaRzZZero;
logic FmaRuXZero, FmaRuYZero, FmaRuZZero;
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
logic IntZeroE;
logic CvtResSgnE;
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
logic CvtResDenormUfE;
// in-between FMA signals
logic Mult;
logic [`NE+1:0] ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
logic AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
logic KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd;
logic [$clog2(3*`NF+7)-1:0] NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
logic [3*`NF+5:0] SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;
logic InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
logic NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
logic ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
logic PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
logic [`NE+1:0] ProdExpE;
logic AddendStickyE;
logic KillProdE;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE;
logic [3*`NF+5:0] SumE;
logic InvZE;
logic NegSumE;
logic ZSgnEffE;
logic PSgnE;
///////////////////////////////////////////////////////////////////////////////////////////////
@ -282,15 +226,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f128_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f128_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
// add the format for the Fma
FmaFmt = {FmaFmt, 2'b11};
Tests = {Tests, f128fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b11};
end
end
end
if (`D_SUPPORTED) begin // if double precision is supported
@ -411,14 +353,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f64_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b01};
Tests = {Tests, f64fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b01};
end
end
end
if (`F_SUPPORTED) begin // if single precision being supported
@ -523,14 +464,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b00};
Tests = {Tests, f32fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b00};
end
end
end
if (`ZFH_SUPPORTED) begin // if half precision supported
@ -617,19 +557,18 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b10};
Tests = {Tests, f16fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
// check if nothing is being tested
if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
if (Tests.size() == 0) begin
$display("TEST %s not supported in this configuration", TEST);
$stop;
end
@ -648,26 +587,17 @@ module testbenchfp;
// Read the first test
initial begin
$display("\n\nRunning %s vectors", Tests[TestNum]);
$display("Running FMA precision %d", FmaTestNum);
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the test index to 0
TestNum = 0;
FmaTestNum = 0;
end
// set a the signals for all tests
always_comb FmaFmtVal = FmaFmt[FmaTestNum];
always_comb UnitVal = Unit[TestNum];
always_comb FmtVal = Fmt[TestNum];
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
always_comb WriteIntVal = WriteInt[OpCtrlNum];
always_comb FrmVal = Frm[FrmNum];
assign Mult = OpCtrlVal === 3'b100;
// modify the format signal if only 2 percisions supported
// - 1 for the larger precision
@ -675,61 +605,9 @@ module testbenchfp;
always_comb begin
if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
else ModFmt = FmtVal;
if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
else FmaModFmt = FmaFmtVal;
end
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg),
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
.XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
.X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg),
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
.XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
.X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg),
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
.XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
.X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg),
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
.XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
.X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
.XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
.X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
@ -754,124 +632,30 @@ module testbenchfp;
///////////////////////////////////////////////////////////////////////////////////////////////
// instantiate devices under test
// - one fma for each precison
// - all the units for the other tests (including fma for add/sub/mul)
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
.XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf),
.XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN),
.KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp),
.SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff),
.PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE),
.FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
.XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf),
.XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN),
.KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp),
.SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff),
.PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ),
.FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
.XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf),
.XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN),
.KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp),
.SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff),
.PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU),
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
.XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf),
.XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN),
.KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp),
.SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff),
.PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD),
.FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
.XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf),
.XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN),
.KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp),
.SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff),
.PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM),
.FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
.ZExpM(ZExp), .ZDenormM(ZDenorm),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN),
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
// .CvtRes, .CvtFlgE);
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
///////////////////////////////////////////////////////////////////////////////////////////////
//Check if the correct answer and result is a NaN
always_comb begin
case (FmaFmtVal)
4'b11: begin // quad
FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
end
4'b01: begin // double
FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
end
4'b00: begin // single
FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
end
4'b10: begin // half
FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
end
endcase
end
always_comb begin
if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
// an integer output can't be a NaN
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
always_comb begin
// select the result to check
case (UnitVal)
`FMAUNIT: Res = FmaRes;
`DIVUNIT: Res = DivRes;
`FMAUNIT: Res = FpRes;
`DIVUNIT: Res = FpRes;
`CMPUNIT: Res = CmpRes;
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
`CVTFPUNIT: Res = CvtRes;
`CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
`CVTFPUNIT: Res = FpRes;
endcase
// select the flag to check
case (UnitVal)
`FMAUNIT: ResFlg = FmaFlg;
`DIVUNIT: ResFlg = DivFlg;
`FMAUNIT: ResFlg = Flg;
`DIVUNIT: ResFlg = Flg;
`CMPUNIT: ResFlg = CmpFlg;
`CVTINTUNIT: ResFlg = CvtFlg;
`CVTFPUNIT: ResFlg = CvtFlg;
`CVTINTUNIT: ResFlg = Flg;
`CVTFPUNIT: ResFlg = Flg;
endcase
end
// check results on falling edge of clk
@ -1027,117 +757,6 @@ end
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
// - the sign of the NaN does not matter for the opperations being tested
// - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
case (FmaFmtVal)
4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
endcase
if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
case (FmtVal)
4'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
@ -1221,77 +840,8 @@ end
$stop;
end
// check if the fma tests are correct
if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx) & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RNE");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
$stop;
end
if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RZ");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
$stop;
end
if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RU");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
$stop;
end
if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RD");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
$stop;
end
if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RNM");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
$stop;
end
VectorNum += 1; // increment the vector
FmaVectorNum += 1; // increment the vector
// check to see if there more vectors in this test
// *** fix this so that fma and other run sepratly - re-add fma num
if ((FmaRneVectors[FmaVectorNum][0] === 1'bx &
FmaRzVectors[FmaVectorNum][0] === 1'bx &
FmaRuVectors[FmaVectorNum][0] === 1'bx &
FmaRdVectors[FmaVectorNum][0] === 1'bx &
FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
// increment the test
FmaTestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the vector index back to 0
FmaVectorNum = 0;
// if no more Tests - finish
if(Tests[TestNum] === "" &
FmaRneTests[FmaTestNum] === "" &
FmaRzTests[FmaTestNum] === "" &
FmaRuTests[FmaTestNum] === "" &
FmaRdTests[FmaTestNum] === "" &
FmaRnmTests[FmaTestNum] === "") begin
$display("\nAll Tests completed with %d errors\n", errors);
$stop;
end
$display("Running FMA precision %d", FmaTestNum);
end
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
@ -1299,14 +849,9 @@ end
TestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the vector index back to 0
VectorNum = 0;
@ -1317,12 +862,7 @@ end
else FrmNum = 0;
// if no more Tests - finish
if(Tests[TestNum] === "" &
FmaRneTests[FmaTestNum] === "" &
FmaRzTests[FmaTestNum] === "" &
FmaRuTests[FmaTestNum] === "" &
FmaRdTests[FmaTestNum] === "" &
FmaRnmTests[FmaTestNum] === "") begin
if(Tests[TestNum] === "") begin
$display("\nAll Tests completed with %d errors\n", errors);
$stop;
end
@ -1335,89 +875,6 @@ endmodule
module readfmavectors (
input logic clk,
input logic [`FMTBITS-1:0] FmaModFmt, // the modified format
input logic [1:0] FmaFmt, // the format of the FMA inputs
input logic [`FLEN*4+7:0] TestVector, // the test vector
output logic [`FLEN-1:0] Ans, // the correct answer
output logic [4:0] AnsFlg, // the correct flag
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic [`FLEN-1:0] X, Y, Z // inputs
);
logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
// apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg
always @(posedge clk) begin
#1;
AnsFlg = TestVector[4:0];
case (FmaFmt)
2'b11: begin // quad
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
end
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE, .ZDenormE);
endmodule
module readvectors (
input logic clk,
input logic [`FLEN*4+7:0] TestVector,
@ -1451,33 +908,61 @@ module readvectors (
`FMAUNIT:
case (Fmt)
2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
if(OpCtrl === `FMA_OPCTRL) begin
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
end
else begin
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
end
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
end
else begin
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
end
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
end
else begin
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
end
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
end
else begin
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
end
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
@ -1532,19 +1017,19 @@ module readvectors (
2'b11: begin // quad
case (OpCtrl[1:0])
2'b11: begin // quad
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
@ -1628,12 +1113,12 @@ module readvectors (
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // quad -> long
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // quad -> int
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
end

View File

@ -396,6 +396,7 @@ module riscvassertions;
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
@ -418,6 +419,7 @@ module riscvassertions;
//assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
//assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
assert (`DCACHE_LINELENINBITS <= `XLEN*16 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must not exceed 16 words because max AHB burst size is 1");
assert (`DCACHE_LINELENINBITS % 4 == 0) else $error("DCACHE_LINELENINBITS must hold 4, 8, or 16 words");
end
endmodule

View File

@ -2,7 +2,7 @@
`define ADD_OPCTRL 3'b110
`define MUL_OPCTRL 3'b100
`define SUB_OPCTRL 3'b111
`define FADD_OPCTRL 3'b000
`define FMA_OPCTRL 3'b000
`define DIV_OPCTRL 3'b000
`define SQRT_OPCTRL 3'b001
`define LE_OPCTRL 3'b011
@ -21,11 +21,11 @@
`define RU 3'b011
`define RD 3'b010
`define RNM 3'b100
`define FMAUNIT 0
`define FMAUNIT 2
`define DIVUNIT 1
`define CVTINTUNIT 2
`define CVTFPUNIT 3
`define CMPUNIT 4
`define CVTINTUNIT 0
`define CVTFPUNIT 4
`define CMPUNIT 3
string f16rv32cvtint[] = '{
"ui32_to_f16_rne.tv",

View File

@ -40,9 +40,6 @@ string tvpaths[] = '{
"../../addins/embench-iot/bd_speed/src/"
};
// *** make sure these are somewhere
string coremark[] = '{
`COREMARK,
"coremark.bare.riscv"
@ -1105,11 +1102,11 @@ string imperas32f[] = '{
// "rv64i_m/D/d_fdiv_b20-01", // looks like flags
// "rv64i_m/D/d_fdiv_b2-01", // also flags
// "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
"rv64i_m/D/d_fdiv_b3-01",
// "rv64i_m/D/d_fdiv_b3-01",
// "rv64i_m/D/d_fdiv_b4-01", // flags
"rv64i_m/D/d_fdiv_b5-01",
// "rv64i_m/D/d_fdiv_b5-01",
// "rv64i_m/D/d_fdiv_b6-01", // flags
"rv64i_m/D/d_fdiv_b7-01",
// "rv64i_m/D/d_fdiv_b7-01",
// "rv64i_m/D/d_fdiv_b8-01", // flags
// "rv64i_m/D/d_fdiv_b9-01", might be a flag too
"rv64i_m/D/d_feq_b1-01",

View File

@ -332,7 +332,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
redirect -append $filename { echo "\n\n\n//// Critical paths through fma1 ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma1/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fma/fma2/*} -nworst 1 }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }

View File

@ -2,482 +2,482 @@
BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
echo "Creating ui32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
echo "Creating ui32_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
echo "Creating ui32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
echo "Creating ui32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
echo "Creating ui64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
echo "Creating ui64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
echo "Creating ui64_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
echo "Creating ui64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
echo "Creating i32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
echo "Creating i32_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
echo "Creating i32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
echo "Creating i32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
echo "Creating i64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
echo "Creating i64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
echo "Creating i64_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
echo "Creating i64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
echo "Creating f16_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
echo "Creating f32_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
echo "Creating f64_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
echo "Creating f128_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
echo "Creating f16_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
echo "Creating f32_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
echo "Creating f64_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
echo "Creating f128_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
echo "Creating f16_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
echo "Creating f32_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
echo "Creating f64_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
echo "Creating f128_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
echo "Creating f16_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
echo "Creating f32_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
echo "Creating f64_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
echo "Creating f128_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
echo "Creating f16_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
echo "Creating f16_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
echo "Creating f16_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
echo "Creating f32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
echo "Creating f32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
echo "Creating f32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
echo "Creating f64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
echo "Creating f64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
echo "Creating f64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
echo "Creating f128_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
echo "Creating f128_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
echo "Creating f128_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
echo "Creating f16_add vectors"
$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
echo "Creating f32_add vectors"
$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
echo "Creating f64_add vectors"
$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
echo "Creating f128_add vectors"
$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
echo "Creating f16_sub vectors"
$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
echo "Creating f32_sub vectors"
$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
echo "Creating f64_sub vectors"
$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
echo "Creating f128_sub vectors"
$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
echo "Creating f16_mul vectors"
$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
echo "Creating f32_mul vectors"
$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
echo "Creating f64_mul vectors"
$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
echo "Creating f128_mul vectors"
$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
echo "Creating f16_div vectors"
$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
echo "Creating f32_div vectors"
$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
echo "Creating f64_div vectors"
$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
echo "Creating f128_div vectors"
$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
echo "Creating f16_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
echo "Creating f32_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
echo "Creating f64_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
echo "Creating f128_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
echo "Creating f16_eq vectors"
$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
echo "Creating f32_eq vectors"
$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
echo "Creating f64_eq vectors"
$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
echo "Creating f128_eq vectors"
$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
echo "Creating f16_le vectors"
$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
echo "Creating f32_le vectors"
$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
echo "Creating f64_le vectors"
$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
echo "Creating f128_le vectors"
$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
echo "Creating f16_lt vectors"
$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
echo "Creating f32_lt vectors"
$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
echo "Creating f64_lt vectors"
$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
echo "Creating f128_lt vectors"
$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
echo "Creating f16_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
echo "Creating f32_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
echo "Creating f64_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
echo "Creating f128_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv